1 /** \file
2 * Implementation of the ANTLR3 string and string factory classes
3 */
4
5 // [The "BSD licence"]
6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7 // http://www.temporal-wave.com
8 // http://www.linkedin.com/in/jimidle
9 //
10 // All rights reserved.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions
14 // are met:
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 // 3. The name of the author may not be used to endorse or promote products
21 // derived from this software without specific prior written permission.
22 //
23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34 #include <antlr3string.h>
35
36 /* Factory API
37 */
38 static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
39 static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory);
40 static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41 static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42 static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43 static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44 static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45 static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46 static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47 static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48 static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49 static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50 static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51 static void closeFactory(pANTLR3_STRING_FACTORY factory);
52
53 /* String API
54 */
55 static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
56 static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars);
57 static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars);
58 static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
59 static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit);
60 static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit);
61 static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62 static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63 static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
64
65 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
66 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
67 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
68
69 static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
70 static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
71 static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
72 static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i);
73 static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74 static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
75
76 static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
77 static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr);
78 static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
79 static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
80 static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
81 static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
82 static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83 static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84 static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
85 static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string);
86 static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
87 static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string);
88 static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
89 static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string);
90
91 /* Local helpers
92 */
93 static void stringInit8 (pANTLR3_STRING string);
94 static void stringInitUTF16 (pANTLR3_STRING string);
95 static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
96
97 ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)98 antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
99 {
100 pANTLR3_STRING_FACTORY factory;
101
102 /* Allocate memory
103 */
104 factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
105
106 if (factory == NULL)
107 {
108 return NULL;
109 }
110
111 /* Now we make a new list to track the strings.
112 */
113 factory->strings = antlr3VectorNew(0);
114 factory->index = 0;
115
116 if (factory->strings == NULL)
117 {
118 ANTLR3_FREE(factory);
119 return NULL;
120 }
121
122 // Install the API
123 //
124 // TODO: These encodings need equivalent functions to
125 // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
126 // The STRING stuff was intended as a quick and dirty hack for people that did not
127 // want to worry about memory and performance very much, but nobody ever reads the
128 // notes or comments or uses the email list search. I want to discourage using these
129 // interfaces as it is much more efficient to use the pointers within the tokens
130 // directly, so I am not implementing the string stuff for the newer encodings.
131 // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
132 // will not be useful beyond returning the text.
133 //
134 switch(encoding)
135 {
136 case ANTLR3_ENC_UTF32:
137 break;
138
139 case ANTLR3_ENC_UTF32BE:
140 break;
141
142 case ANTLR3_ENC_UTF32LE:
143 break;
144
145 case ANTLR3_ENC_UTF16BE:
146 case ANTLR3_ENC_UTF16LE:
147 case ANTLR3_ENC_UTF16:
148
149 factory->newRaw = newRawUTF16;
150 factory->newSize = newSizeUTF16;
151 factory->newPtr = newPtrUTF16_UTF16;
152 factory->newPtr8 = newPtrUTF16_8;
153 factory->newStr = newStrUTF16_UTF16;
154 factory->newStr8 = newStrUTF16_8;
155 factory->printable = printableUTF16;
156 factory->destroy = destroy;
157 factory->close = closeFactory;
158 break;
159
160 case ANTLR3_ENC_UTF8:
161 case ANTLR3_ENC_EBCDIC:
162 case ANTLR3_ENC_8BIT:
163 default:
164
165 factory->newRaw = newRaw8;
166 factory->newSize = newSize8;
167 factory->newPtr = newPtr8;
168 factory->newPtr8 = newPtr8;
169 factory->newStr = newStr8;
170 factory->newStr8 = newStr8;
171 factory->printable = printable8;
172 factory->destroy = destroy;
173 factory->close = closeFactory;
174 break;
175 }
176 return factory;
177 }
178
179
180 /**
181 *
182 * \param factory
183 * \return
184 */
185 static pANTLR3_STRING
newRaw8(pANTLR3_STRING_FACTORY factory)186 newRaw8 (pANTLR3_STRING_FACTORY factory)
187 {
188 pANTLR3_STRING string;
189
190 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191
192 if (string == NULL)
193 {
194 return NULL;
195 }
196
197 /* Structure is allocated, now fill in the API etc.
198 */
199 stringInit8(string);
200 string->factory = factory;
201
202 /* Add the string into the allocated list
203 */
204 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
205 string->index = factory->index++;
206
207 return string;
208 }
209 /**
210 *
211 * \param factory
212 * \return
213 */
214 static pANTLR3_STRING
newRawUTF16(pANTLR3_STRING_FACTORY factory)215 newRawUTF16 (pANTLR3_STRING_FACTORY factory)
216 {
217 pANTLR3_STRING string;
218
219 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220
221 if (string == NULL)
222 {
223 return NULL;
224 }
225
226 /* Structure is allocated, now fill in the API etc.
227 */
228 stringInitUTF16(string);
229 string->factory = factory;
230
231 /* Add the string into the allocated list
232 */
233 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
234 string->index = factory->index++;
235
236 return string;
237 }
238 static
stringFree(pANTLR3_STRING string)239 void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
240 {
241 /* First free the string itself if there was anything in it
242 */
243 if (string->chars)
244 {
245 ANTLR3_FREE(string->chars);
246 }
247
248 /* Now free the space for this string
249 */
250 ANTLR3_FREE(string);
251
252 return;
253 }
254 /**
255 *
256 * \param string
257 * \return
258 */
259 static void
stringInit8(pANTLR3_STRING string)260 stringInit8 (pANTLR3_STRING string)
261 {
262 string->len = 0;
263 string->size = 0;
264 string->chars = NULL;
265 string->encoding = ANTLR3_ENC_8BIT ;
266
267 /* API for 8 bit strings*/
268
269 string->set = set8;
270 string->set8 = set8;
271 string->append = append8;
272 string->append8 = append8;
273 string->insert = insert8;
274 string->insert8 = insert8;
275 string->addi = addi8;
276 string->inserti = inserti8;
277 string->addc = addc8;
278 string->charAt = charAt8;
279 string->compare = compare8;
280 string->compare8 = compare8;
281 string->subString = subString8;
282 string->toInt32 = toInt32_8;
283 string->to8 = to8_8;
284 string->toUTF8 = toUTF8_8;
285 string->compareS = compareS;
286 string->setS = setS;
287 string->appendS = appendS;
288 string->insertS = insertS;
289
290 }
291 /**
292 *
293 * \param string
294 * \return
295 */
296 static void
stringInitUTF16(pANTLR3_STRING string)297 stringInitUTF16 (pANTLR3_STRING string)
298 {
299 string->len = 0;
300 string->size = 0;
301 string->chars = NULL;
302 string->encoding = ANTLR3_ENC_8BIT;
303
304 /* API for UTF16 strings */
305
306 string->set = setUTF16_UTF16;
307 string->set8 = setUTF16_8;
308 string->append = appendUTF16_UTF16;
309 string->append8 = appendUTF16_8;
310 string->insert = insertUTF16_UTF16;
311 string->insert8 = insertUTF16_8;
312 string->addi = addiUTF16;
313 string->inserti = insertiUTF16;
314 string->addc = addcUTF16;
315 string->charAt = charAtUTF16;
316 string->compare = compareUTF16_UTF16;
317 string->compare8 = compareUTF16_8;
318 string->subString = subStringUTF16;
319 string->toInt32 = toInt32_UTF16;
320 string->to8 = to8_UTF16;
321 string->toUTF8 = toUTF8_UTF16;
322
323 string->compareS = compareS;
324 string->setS = setS;
325 string->appendS = appendS;
326 string->insertS = insertS;
327 }
328 /**
329 *
330 * \param string
331 * \return
332 * TODO: Implement UTF-8
333 */
334 static void
stringInitUTF8(pANTLR3_STRING string)335 stringInitUTF8 (pANTLR3_STRING string)
336 {
337 string->len = 0;
338 string->size = 0;
339 string->chars = NULL;
340
341 /* API */
342
343 }
344
345 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
346 // a memcpy as we make no assumptions about the 8 bit encoding.
347 //
348 static pANTLR3_STRING
toUTF8_8(pANTLR3_STRING string)349 toUTF8_8 (pANTLR3_STRING string)
350 {
351 return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
352 }
353
354 // Convert a UTF16 string into a UTF8 representation using the Unicode.org
355 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
356 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
357 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
358 //
359 static pANTLR3_STRING
toUTF8_UTF16(pANTLR3_STRING string)360 toUTF8_UTF16 (pANTLR3_STRING string)
361 {
362
363 UTF8 * outputEnd;
364 UTF16 * inputEnd;
365 pANTLR3_STRING utf8String;
366
367 ConversionResult cResult;
368
369 // Allocate the output buffer, which needs to accommodate potentially
370 // 3X (in bytes) the input size (in chars).
371 //
372 utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
373
374 if (utf8String != NULL)
375 {
376 // Free existing allocation
377 //
378 ANTLR3_FREE(utf8String->chars);
379
380 // Reallocate according to maximum expected size
381 //
382 utf8String->size = string->len *3;
383 utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
384
385 if (utf8String->chars != NULL)
386 {
387 inputEnd = (UTF16 *) (string->chars);
388 outputEnd = (UTF8 *) (utf8String->chars);
389
390 // Call the Unicode converter
391 //
392 cResult = ConvertUTF16toUTF8
393 (
394 (const UTF16**)&inputEnd,
395 ((const UTF16 *)(string->chars)) + string->len,
396 &outputEnd,
397 outputEnd + utf8String->size - 1,
398 lenientConversion
399 );
400
401 // We don't really care if things failed or not here, we just converted
402 // everything that was vaguely possible and stopped when it wasn't. It is
403 // up to the grammar programmer to verify that the input is sensible.
404 //
405 utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
406
407 *(outputEnd+1) = '\0'; // Always null terminate
408 }
409 }
410 return utf8String;
411 }
412
413 /**
414 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
415 *
416 * \param[in] factory - Pointer to the string factory that owns strings
417 * \param[in] size - In characters
418 * \return pointer to the new string.
419 */
420 static pANTLR3_STRING
newSize8(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)421 newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
422 {
423 pANTLR3_STRING string;
424
425 string = factory->newRaw(factory);
426
427 if (string == NULL)
428 {
429 return string;
430 }
431
432 /* Always add one more byte for a terminator ;-)
433 */
434 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
435 if (string->chars != NULL)
436 {
437 *(string->chars) = '\0';
438 string->size = size + 1;
439 }
440
441 return string;
442 }
443 /**
444 * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
445 *
446 * \param[in] factory - Pointer to the string factory that owns strings
447 * \param[in] size - In characters (count double for surrogate pairs!!!)
448 * \return pointer to the new string.
449 */
450 static pANTLR3_STRING
newSizeUTF16(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)451 newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
452 {
453 pANTLR3_STRING string;
454
455 string = factory->newRaw(factory);
456
457 if (string == NULL)
458 {
459 return string;
460 }
461
462 /* Always add one more byte for a terminator ;-)
463 */
464 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
465 if (string->chars != NULL)
466 {
467 *(string->chars) = '\0';
468 string->size = size+1; /* Size is always in characters, as is len */
469 }
470
471 return string;
472 }
473
474 /** Creates a new 8 bit string initialized with the 8 bit characters at the
475 * supplied ptr, of pre-determined size.
476 * \param[in] factory - Pointer to the string factory that owns the strings
477 * \param[in] ptr - Pointer to 8 bit encoded characters
478 * \return pointer to the new string
479 */
480 static pANTLR3_STRING
newPtr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)481 newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
482 {
483 pANTLR3_STRING string;
484
485 string = factory->newSize(factory, size);
486
487 if (string == NULL)
488 {
489 return NULL;
490 }
491
492 if (size <= 0)
493 {
494 return string;
495 }
496
497 if (ptr != NULL)
498 {
499 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
500 *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
501 string->len = size;
502 }
503
504 return string;
505 }
506
507 /** Creates a new UTF16 string initialized with the 8 bit characters at the
508 * supplied 8 bit character ptr, of pre-determined size.
509 * \param[in] factory - Pointer to the string factory that owns the strings
510 * \param[in] ptr - Pointer to 8 bit encoded characters
511 * \return pointer to the new string
512 */
513 static pANTLR3_STRING
newPtrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)514 newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
515 {
516 pANTLR3_STRING string;
517
518 /* newSize accepts size in characters, not bytes
519 */
520 string = factory->newSize(factory, size);
521
522 if (string == NULL)
523 {
524 return NULL;
525 }
526
527 if (size <= 0)
528 {
529 return string;
530 }
531
532 if (ptr != NULL)
533 {
534 pANTLR3_UINT16 out;
535 ANTLR3_INT32 inSize;
536
537 out = (pANTLR3_UINT16)(string->chars);
538 inSize = size;
539
540 while (inSize-- > 0)
541 {
542 *out++ = (ANTLR3_UINT16)(*ptr++);
543 }
544
545 /* Terminate, these strings are usually used for Token streams and printing etc.
546 */
547 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
548
549 string->len = size;
550 }
551
552 return string;
553 }
554
555 /** Creates a new UTF16 string initialized with the UTF16 characters at the
556 * supplied ptr, of pre-determined size.
557 * \param[in] factory - Pointer to the string factory that owns the strings
558 * \param[in] ptr - Pointer to UTF16 encoded characters
559 * \return pointer to the new string
560 */
561 static pANTLR3_STRING
newPtrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)562 newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
563 {
564 pANTLR3_STRING string;
565
566 string = factory->newSize(factory, size);
567
568 if (string == NULL)
569 {
570 return NULL;
571 }
572
573 if (size <= 0)
574 {
575 return string;
576 }
577
578 if (ptr != NULL)
579 {
580 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
581
582 /* Terminate, these strings are usually used for Token streams and printing etc.
583 */
584 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
585 string->len = size;
586 }
587
588 return string;
589 }
590
591 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
592 * \param[in] factory - Pointer to the string factory that owns strings.
593 * \param[in] ptr - Pointer to the 8 bit encoded string
594 * \return Pointer to the newly initialized string
595 */
596 static pANTLR3_STRING
newStr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)597 newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
598 {
599 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
600 }
601
602 /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
603 * \param[in] factory - Pointer to the string factory that owns strings.
604 * \param[in] ptr - Pointer to the 8 bit encoded string
605 * \return POinter to the newly initialized string
606 */
607 static pANTLR3_STRING
newStrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)608 newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
609 {
610 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
611 }
612
613 /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
614 * \param[in] factory - Pointer to the string factory that owns strings.
615 * \param[in] ptr - Pointer to the UTF16 encoded string
616 * \return Pointer to the newly initialized string
617 */
618 static pANTLR3_STRING
newStrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)619 newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
620 {
621 pANTLR3_UINT16 in;
622 ANTLR3_UINT32 count;
623
624 /** First, determine the length of the input string
625 */
626 in = (pANTLR3_UINT16)ptr;
627 count = 0;
628
629 while (*in++ != '\0')
630 {
631 count++;
632 }
633 return factory->newPtr(factory, ptr, count);
634 }
635
636 static void
destroy(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING string)637 destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
638 {
639 // Record which string we are deleting
640 //
641 ANTLR3_UINT32 strIndex = string->index;
642
643 // Ensure that the string was not factory made, or we would try
644 // to delete memory that wasn't allocated outside the factory
645 // block.
646 // Remove the specific indexed string from the vector
647 //
648 factory->strings->del(factory->strings, strIndex);
649
650 // One less string in the vector, so decrement the factory index
651 // so that the next string allocated is indexed correctly with
652 // respect to the vector.
653 //
654 factory->index--;
655
656 // Now we have to reindex the strings in the vector that followed
657 // the one we just deleted. We only do this if the one we just deleted
658 // was not the last one.
659 //
660 if (strIndex< factory->index)
661 {
662 // We must reindex the strings after the one we just deleted.
663 // The one that follows the one we just deleted is also out
664 // of whack, so we start there.
665 //
666 ANTLR3_UINT32 i;
667
668 for (i = strIndex; i < factory->index; i++)
669 {
670 // Renumber the entry
671 //
672 ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
673 }
674 }
675
676 // The string has been destroyed and the elements of the factory are reindexed.
677 //
678
679 }
680
681 static pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)682 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
683 {
684 pANTLR3_STRING string;
685
686 /* We don't need to be too efficient here, this is mostly for error messages and so on.
687 */
688 pANTLR3_UINT8 scannedText;
689 ANTLR3_UINT32 i;
690
691 /* Assume we need as much as twice as much space to parse out the control characters
692 */
693 string = factory->newSize(factory, instr->len *2 + 1);
694
695 /* Scan through and replace unprintable (in terms of this routine)
696 * characters
697 */
698 scannedText = string->chars;
699
700 for (i = 0; i < instr->len; i++)
701 {
702 if (*(instr->chars + i) == '\n')
703 {
704 *scannedText++ = '\\';
705 *scannedText++ = 'n';
706 }
707 else if (*(instr->chars + i) == '\r')
708 {
709 *scannedText++ = '\\';
710 *scannedText++ = 'r';
711 }
712 else if (!isprint(*(instr->chars +i)))
713 {
714 *scannedText++ = '?';
715 }
716 else
717 {
718 *scannedText++ = *(instr->chars + i);
719 }
720 }
721 *scannedText = '\0';
722
723 string->len = (ANTLR3_UINT32)(scannedText - string->chars);
724
725 return string;
726 }
727
728 static pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)729 printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
730 {
731 pANTLR3_STRING string;
732
733 /* We don't need to be too efficient here, this is mostly for error messages and so on.
734 */
735 pANTLR3_UINT16 scannedText;
736 pANTLR3_UINT16 inText;
737 ANTLR3_UINT32 i;
738 ANTLR3_UINT32 outLen;
739
740 /* Assume we need as much as twice as much space to parse out the control characters
741 */
742 string = factory->newSize(factory, instr->len *2 + 1);
743
744 /* Scan through and replace unprintable (in terms of this routine)
745 * characters
746 */
747 scannedText = (pANTLR3_UINT16)(string->chars);
748 inText = (pANTLR3_UINT16)(instr->chars);
749 outLen = 0;
750
751 for (i = 0; i < instr->len; i++)
752 {
753 if (*(inText + i) == '\n')
754 {
755 *scannedText++ = '\\';
756 *scannedText++ = 'n';
757 outLen += 2;
758 }
759 else if (*(inText + i) == '\r')
760 {
761 *scannedText++ = '\\';
762 *scannedText++ = 'r';
763 outLen += 2;
764 }
765 else if (!isprint(*(inText +i)))
766 {
767 *scannedText++ = '?';
768 outLen++;
769 }
770 else
771 {
772 *scannedText++ = *(inText + i);
773 outLen++;
774 }
775 }
776 *scannedText = '\0';
777
778 string->len = outLen;
779
780 return string;
781 }
782
783 /** Fascist Capitalist Pig function created
784 * to oppress the workers comrade.
785 */
786 static void
closeFactory(pANTLR3_STRING_FACTORY factory)787 closeFactory (pANTLR3_STRING_FACTORY factory)
788 {
789 /* Delete the vector we were tracking the strings with, this will
790 * causes all the allocated strings to be deallocated too
791 */
792 factory->strings->free(factory->strings);
793
794 /* Delete the space for the factory itself
795 */
796 ANTLR3_FREE((void *)factory);
797 }
798
799 static pANTLR3_UINT8
append8(pANTLR3_STRING string,const char * newbit)800 append8 (pANTLR3_STRING string, const char * newbit)
801 {
802 ANTLR3_UINT32 len;
803
804 len = (ANTLR3_UINT32)strlen(newbit);
805
806 if (string->size < (string->len + len + 1))
807 {
808 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
809 if (newAlloc == NULL)
810 {
811 return NULL;
812 }
813 string->chars = newAlloc;
814 string->size = string->len + len + 1;
815 }
816
817 /* Note we copy one more byte than the strlen in order to get the trailing
818 */
819 ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
820 string->len += len;
821
822 return string->chars;
823 }
824
825 static pANTLR3_UINT8
appendUTF16_8(pANTLR3_STRING string,const char * newbit)826 appendUTF16_8 (pANTLR3_STRING string, const char * newbit)
827 {
828 ANTLR3_UINT32 len;
829 pANTLR3_UINT16 apPoint;
830 ANTLR3_UINT32 count;
831
832 len = (ANTLR3_UINT32)strlen(newbit);
833
834 if (string->size < (string->len + len + 1))
835 {
836 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
837 if (newAlloc == NULL)
838 {
839 return NULL;
840 }
841 string->chars = newAlloc;
842 string->size = string->len + len + 1;
843 }
844
845 apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
846 string->len += len;
847
848 for (count = 0; count < len; count++)
849 {
850 *apPoint++ = *(newbit + count);
851 }
852 *apPoint = '\0';
853
854 return string->chars;
855 }
856
857 static pANTLR3_UINT8
appendUTF16_UTF16(pANTLR3_STRING string,const char * newbit)858 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit)
859 {
860 ANTLR3_UINT32 len;
861 pANTLR3_UINT16 in;
862
863 /** First, determine the length of the input string
864 */
865 in = (pANTLR3_UINT16)newbit;
866 len = 0;
867
868 while (*in++ != '\0')
869 {
870 len++;
871 }
872
873 if (string->size < (string->len + len + 1))
874 {
875 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
876 if (newAlloc == NULL)
877 {
878 return NULL;
879 }
880 string->chars = newAlloc;
881 string->size = string->len + len + 1;
882 }
883
884 /* Note we copy one more byte than the strlen in order to get the trailing delimiter
885 */
886 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
887 string->len += len;
888
889 return string->chars;
890 }
891
892 static pANTLR3_UINT8
set8(pANTLR3_STRING string,const char * chars)893 set8 (pANTLR3_STRING string, const char * chars)
894 {
895 ANTLR3_UINT32 len;
896
897 len = (ANTLR3_UINT32)strlen(chars);
898 if (string->size < len + 1)
899 {
900 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
901 if (newAlloc == NULL)
902 {
903 return NULL;
904 }
905 string->chars = newAlloc;
906 string->size = len + 1;
907 }
908
909 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
910 */
911 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
912 string->len = len;
913
914 return string->chars;
915
916 }
917
918 static pANTLR3_UINT8
setUTF16_8(pANTLR3_STRING string,const char * chars)919 setUTF16_8 (pANTLR3_STRING string, const char * chars)
920 {
921 ANTLR3_UINT32 len;
922 ANTLR3_UINT32 count;
923 pANTLR3_UINT16 apPoint;
924
925 len = (ANTLR3_UINT32)strlen(chars);
926 if (string->size < len + 1)
927 {
928 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
929 if (newAlloc == NULL)
930 {
931 return NULL;
932 }
933 string->chars = newAlloc;
934 string->size = len + 1;
935 }
936 apPoint = ((pANTLR3_UINT16)string->chars);
937 string->len = len;
938
939 for (count = 0; count < string->len; count++)
940 {
941 *apPoint++ = *(chars + count);
942 }
943 *apPoint = '\0';
944
945 return string->chars;
946 }
947
948 static pANTLR3_UINT8
setUTF16_UTF16(pANTLR3_STRING string,const char * chars)949 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars)
950 {
951 ANTLR3_UINT32 len;
952 pANTLR3_UINT16 in;
953
954 /** First, determine the length of the input string
955 */
956 in = (pANTLR3_UINT16)chars;
957 len = 0;
958
959 while (*in++ != '\0')
960 {
961 len++;
962 }
963
964 if (string->size < len + 1)
965 {
966 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
967 if (newAlloc == NULL)
968 {
969 return NULL;
970 }
971 string->chars = newAlloc;
972 string->size = len + 1;
973 }
974
975 /* Note we copy one more byte than the strlen in order to get the trailing '\0'
976 */
977 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
978 string->len = len;
979
980 return string->chars;
981
982 }
983
984 static pANTLR3_UINT8
addc8(pANTLR3_STRING string,ANTLR3_UINT32 c)985 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
986 {
987 if (string->size < string->len + 2)
988 {
989 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
990 if (newAlloc == NULL)
991 {
992 return NULL;
993 }
994 string->chars = newAlloc;
995 string->size = string->len + 2;
996 }
997 *(string->chars + string->len) = (ANTLR3_UINT8)c;
998 *(string->chars + string->len + 1) = '\0';
999 string->len++;
1000
1001 return string->chars;
1002 }
1003
1004 static pANTLR3_UINT8
addcUTF16(pANTLR3_STRING string,ANTLR3_UINT32 c)1005 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
1006 {
1007 pANTLR3_UINT16 ptr;
1008
1009 if (string->size < string->len + 2)
1010 {
1011 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
1012 if (newAlloc == NULL)
1013 {
1014 return NULL;
1015 }
1016 string->chars = newAlloc;
1017 string->size = string->len + 2;
1018 }
1019 ptr = (pANTLR3_UINT16)(string->chars);
1020
1021 *(ptr + string->len) = (ANTLR3_UINT16)c;
1022 *(ptr + string->len + 1) = '\0';
1023 string->len++;
1024
1025 return string->chars;
1026 }
1027
1028 static pANTLR3_UINT8
addi8(pANTLR3_STRING string,ANTLR3_INT32 i)1029 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
1030 {
1031 ANTLR3_UINT8 newbit[32];
1032
1033 sprintf((char *)newbit, "%d", i);
1034
1035 return string->append8(string, (const char *)newbit);
1036 }
1037 static pANTLR3_UINT8
addiUTF16(pANTLR3_STRING string,ANTLR3_INT32 i)1038 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i)
1039 {
1040 ANTLR3_UINT8 newbit[32];
1041
1042 sprintf((char *)newbit, "%d", i);
1043
1044 return string->append8(string, (const char *)newbit);
1045 }
1046
1047 static pANTLR3_UINT8
inserti8(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1048 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1049 {
1050 ANTLR3_UINT8 newbit[32];
1051
1052 sprintf((char *)newbit, "%d", i);
1053 return string->insert8(string, point, (const char *)newbit);
1054 }
1055 static pANTLR3_UINT8
insertiUTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1056 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1057 {
1058 ANTLR3_UINT8 newbit[32];
1059
1060 sprintf((char *)newbit, "%d", i);
1061 return string->insert8(string, point, (const char *)newbit);
1062 }
1063
1064 static pANTLR3_UINT8
insert8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1065 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1066 {
1067 ANTLR3_UINT32 len;
1068
1069 if (point >= string->len)
1070 {
1071 return string->append(string, newbit);
1072 }
1073
1074 len = (ANTLR3_UINT32)strlen(newbit);
1075
1076 if (len == 0)
1077 {
1078 return string->chars;
1079 }
1080
1081 if (string->size < (string->len + len + 1))
1082 {
1083 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1084 if (newAlloc == NULL)
1085 {
1086 return NULL;
1087 }
1088 string->chars = newAlloc;
1089 string->size = string->len + len + 1;
1090 }
1091
1092 /* Move the characters we are inserting before, including the delimiter
1093 */
1094 ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1095
1096 /* Note we copy the exact number of bytes
1097 */
1098 ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1099
1100 string->len += len;
1101
1102 return string->chars;
1103 }
1104
1105 static pANTLR3_UINT8
insertUTF16_8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1106 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1107 {
1108 ANTLR3_UINT32 len;
1109 ANTLR3_UINT32 count;
1110 pANTLR3_UINT16 inPoint;
1111
1112 if (point >= string->len)
1113 {
1114 return string->append8(string, newbit);
1115 }
1116
1117 len = (ANTLR3_UINT32)strlen(newbit);
1118
1119 if (len == 0)
1120 {
1121 return string->chars;
1122 }
1123
1124 if (string->size < (string->len + len + 1))
1125 {
1126 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1127 if (newAlloc == NULL)
1128 {
1129 return NULL;
1130 }
1131 string->chars = newAlloc;
1132 string->size = string->len + len + 1;
1133 }
1134
1135 /* Move the characters we are inserting before, including the delimiter
1136 */
1137 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1138
1139 string->len += len;
1140
1141 inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1142 for (count = 0; count<len; count++)
1143 {
1144 *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1145 }
1146
1147 return string->chars;
1148 }
1149
1150 static pANTLR3_UINT8
insertUTF16_UTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1151 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1152 {
1153 ANTLR3_UINT32 len;
1154 pANTLR3_UINT16 in;
1155
1156 if (point >= string->len)
1157 {
1158 return string->append(string, newbit);
1159 }
1160
1161 /** First, determine the length of the input string
1162 */
1163 in = (pANTLR3_UINT16)newbit;
1164 len = 0;
1165
1166 while (*in++ != '\0')
1167 {
1168 len++;
1169 }
1170
1171 if (len == 0)
1172 {
1173 return string->chars;
1174 }
1175
1176 if (string->size < (string->len + len + 1))
1177 {
1178 pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1179 if (newAlloc == NULL)
1180 {
1181 return NULL;
1182 }
1183 string->chars = newAlloc;
1184 string->size = string->len + len + 1;
1185 }
1186
1187 /* Move the characters we are inserting before, including the delimiter
1188 */
1189 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1190
1191
1192 /* Note we copy the exact number of characters
1193 */
1194 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1195
1196 string->len += len;
1197
1198 return string->chars;
1199 }
1200
setS(pANTLR3_STRING string,pANTLR3_STRING chars)1201 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
1202 {
1203 return string->set(string, (const char *)(chars->chars));
1204 }
1205
appendS(pANTLR3_STRING string,pANTLR3_STRING newbit)1206 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
1207 {
1208 /* We may be passed an empty string, in which case we just return the current pointer
1209 */
1210 if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1211 {
1212 return string->chars;
1213 }
1214 else
1215 {
1216 return string->append(string, (const char *)(newbit->chars));
1217 }
1218 }
1219
insertS(pANTLR3_STRING string,ANTLR3_UINT32 point,pANTLR3_STRING newbit)1220 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1221 {
1222 return string->insert(string, point, (const char *)(newbit->chars));
1223 }
1224
1225 /* Function that compares the text of a string to the supplied
1226 * 8 bit character string and returns a result a la strcmp()
1227 */
1228 static ANTLR3_UINT32
compare8(pANTLR3_STRING string,const char * compStr)1229 compare8 (pANTLR3_STRING string, const char * compStr)
1230 {
1231 return strcmp((const char *)(string->chars), compStr);
1232 }
1233
1234 /* Function that compares the text of a string with the supplied character string
1235 * (which is assumed to be in the same encoding as the string itself) and returns a result
1236 * a la strcmp()
1237 */
1238 static ANTLR3_UINT32
compareUTF16_8(pANTLR3_STRING string,const char * compStr)1239 compareUTF16_8 (pANTLR3_STRING string, const char * compStr)
1240 {
1241 pANTLR3_UINT16 ourString;
1242 ANTLR3_UINT32 charDiff;
1243
1244 ourString = (pANTLR3_UINT16)(string->chars);
1245
1246 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1247 {
1248 charDiff = *ourString - *compStr;
1249 if (charDiff != 0)
1250 {
1251 return charDiff;
1252 }
1253 ourString++;
1254 compStr++;
1255 }
1256
1257 /* At this point, one of the strings was terminated
1258 */
1259 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1260
1261 }
1262
1263 /* Function that compares the text of a string with the supplied character string
1264 * (which is assumed to be in the same encoding as the string itself) and returns a result
1265 * a la strcmp()
1266 */
1267 static ANTLR3_UINT32
compareUTF16_UTF16(pANTLR3_STRING string,const char * compStr8)1268 compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8)
1269 {
1270 pANTLR3_UINT16 ourString;
1271 pANTLR3_UINT16 compStr;
1272 ANTLR3_UINT32 charDiff;
1273
1274 ourString = (pANTLR3_UINT16)(string->chars);
1275 compStr = (pANTLR3_UINT16)(compStr8);
1276
1277 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1278 {
1279 charDiff = *ourString - *compStr;
1280 if (charDiff != 0)
1281 {
1282 return charDiff;
1283 }
1284 ourString++;
1285 compStr++;
1286 }
1287
1288 /* At this point, one of the strings was terminated
1289 */
1290 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1291 }
1292
1293 /* Function that compares the text of a string with the supplied string
1294 * (which is assumed to be in the same encoding as the string itself) and returns a result
1295 * a la strcmp()
1296 */
1297 static ANTLR3_UINT32
compareS(pANTLR3_STRING string,pANTLR3_STRING compStr)1298 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
1299 {
1300 return string->compare(string, (const char *)compStr->chars);
1301 }
1302
1303
1304 /* Function that returns the character indexed at the supplied
1305 * offset as a 32 bit character.
1306 */
1307 static ANTLR3_UCHAR
charAt8(pANTLR3_STRING string,ANTLR3_UINT32 offset)1308 charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1309 {
1310 if (offset > string->len)
1311 {
1312 return (ANTLR3_UCHAR)'\0';
1313 }
1314 else
1315 {
1316 return (ANTLR3_UCHAR)(*(string->chars + offset));
1317 }
1318 }
1319
1320 /* Function that returns the character indexed at the supplied
1321 * offset as a 32 bit character.
1322 */
1323 static ANTLR3_UCHAR
charAtUTF16(pANTLR3_STRING string,ANTLR3_UINT32 offset)1324 charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1325 {
1326 if (offset > string->len)
1327 {
1328 return (ANTLR3_UCHAR)'\0';
1329 }
1330 else
1331 {
1332 return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1333 }
1334 }
1335
1336 /* Function that returns a substring of the supplied string a la .subString(s,e)
1337 * in java runtimes.
1338 */
1339 static pANTLR3_STRING
subString8(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1340 subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1341 {
1342 pANTLR3_STRING newStr;
1343
1344 if (endIndex > string->len)
1345 {
1346 endIndex = string->len + 1;
1347 }
1348 newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1349
1350 return newStr;
1351 }
1352
1353 /* Returns a substring of the supplied string a la .subString(s,e)
1354 * in java runtimes.
1355 */
1356 static pANTLR3_STRING
subStringUTF16(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1357 subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1358 {
1359 pANTLR3_STRING newStr;
1360
1361 if (endIndex > string->len)
1362 {
1363 endIndex = string->len + 1;
1364 }
1365 newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1366
1367 return newStr;
1368 }
1369
1370 /* Function that can convert the characters in the string to an integer
1371 */
1372 static ANTLR3_INT32
toInt32_8(struct ANTLR3_STRING_struct * string)1373 toInt32_8 (struct ANTLR3_STRING_struct * string)
1374 {
1375 return atoi((const char *)(string->chars));
1376 }
1377
1378 /* Function that can convert the characters in the string to an integer
1379 */
1380 static ANTLR3_INT32
toInt32_UTF16(struct ANTLR3_STRING_struct * string)1381 toInt32_UTF16 (struct ANTLR3_STRING_struct * string)
1382 {
1383 pANTLR3_UINT16 input;
1384 ANTLR3_INT32 value;
1385 ANTLR3_BOOLEAN negate;
1386
1387 value = 0;
1388 input = (pANTLR3_UINT16)(string->chars);
1389 negate = ANTLR3_FALSE;
1390
1391 if (*input == (ANTLR3_UCHAR)'-')
1392 {
1393 negate = ANTLR3_TRUE;
1394 input++;
1395 }
1396 else if (*input == (ANTLR3_UCHAR)'+')
1397 {
1398 input++;
1399 }
1400
1401 while (*input != '\0' && isdigit(*input))
1402 {
1403 value = value * 10;
1404 value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1405 input++;
1406 }
1407
1408 return negate ? -value : value;
1409 }
1410
1411 /* Function that returns a pointer to an 8 bit version of the string,
1412 * which in this case is just the string as this is
1413 * 8 bit encodiing anyway.
1414 */
to8_8(pANTLR3_STRING string)1415 static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
1416 {
1417 return string;
1418 }
1419
1420 /* Function that returns an 8 bit version of the string,
1421 * which in this case is returning all the UTF16 characters
1422 * narrowed back into 8 bits, with characters that are too large
1423 * replaced with '_'
1424 */
to8_UTF16(pANTLR3_STRING string)1425 static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string)
1426 {
1427 pANTLR3_STRING newStr;
1428 ANTLR3_UINT32 i;
1429
1430 /* Create a new 8 bit string
1431 */
1432 newStr = newRaw8(string->factory);
1433
1434 if (newStr == NULL)
1435 {
1436 return NULL;
1437 }
1438
1439 /* Always add one more byte for a terminator
1440 */
1441 newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1442 if (newStr->chars != NULL)
1443 {
1444 newStr->size = string->len + 1;
1445 newStr->len = string->len;
1446
1447 /* Now copy each UTF16 charActer , making it an 8 bit character of
1448 * some sort.
1449 */
1450 for (i=0; i<string->len; i++)
1451 {
1452 ANTLR3_UCHAR c;
1453
1454 c = *(((pANTLR3_UINT16)(string->chars)) + i);
1455
1456 *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1457 }
1458
1459 /* Terminate
1460 */
1461 *(newStr->chars + newStr->len) = '\0';
1462 }
1463
1464 return newStr;
1465 }
1466