• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /** \file
2  * Simple string interface allows indiscriminate allocation of strings
3  * such that they can be allocated all over the place and released in
4  * one chunk via a string factory - saves lots of hassle in remembering what
5  * strings were allocated where.
6  */
7 #ifndef	_ANTLR3_STRING_H
8 #define	_ANTLR3_STRING_H
9 
10 // [The "BSD licence"]
11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
12 // http://www.temporal-wave.com
13 // http://www.linkedin.com/in/jimidle
14 //
15 // All rights reserved.
16 //
17 // Redistribution and use in source and binary forms, with or without
18 // modification, are permitted provided that the following conditions
19 // are met:
20 // 1. Redistributions of source code must retain the above copyright
21 //    notice, this list of conditions and the following disclaimer.
22 // 2. Redistributions in binary form must reproduce the above copyright
23 //    notice, this list of conditions and the following disclaimer in the
24 //    documentation and/or other materials provided with the distribution.
25 // 3. The name of the author may not be used to endorse or promote products
26 //    derived from this software without specific prior written permission.
27 //
28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 
39 #include    <antlr3defs.h>
40 #include    <antlr3collections.h>
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
46 /** Base string class tracks the allocations and provides simple string
47  *  tracking functions. Mostly you can work directly on the string for things
48  *  that don't reallocate it, like strchr() etc. Perhaps someone will want to provide implementations for UTF8
49  *  and so on.
50  */
51 typedef	struct ANTLR3_STRING_struct
52 {
53 
54     /** The factory that created this string
55      */
56     pANTLR3_STRING_FACTORY	factory;
57 
58     /** Pointer to the current string value (starts at NULL unless
59      *  the string allocator is told to create it with a pre known size.
60      */
61     pANTLR3_UINT8		chars;
62 
63     /** Current length of the string up to and not including, the trailing '\0'
64      *  Note that the actual allocation (->size)
65      *  is always at least one byte more than this to accommodate trailing '\0'
66      */
67     ANTLR3_UINT32		len;
68 
69     /** Current size of the string in bytes including the trailing '\0'
70      */
71     ANTLR3_UINT32		size;
72 
73     /** Index of string (allocation number) in case someone wants
74      *  to explicitly release it.
75      */
76     ANTLR3_UINT32		index;
77 
78     /** Occasionally it is useful to know what the encoding of the string
79      *  actually is, hence it is stored here as one the ANTLR3_ENCODING_ values
80      */
81     ANTLR3_UINT8		encoding;
82 
83     /** Pointer to function that sets the string value to a specific string in the default encoding
84      *  for this string. For instance, if this is 8 bit, then this function is the same as set8
85      *  but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters, not
86      *  8 bit.
87      */
88     pANTLR3_UINT8   (*set)	(struct ANTLR3_STRING_struct * string, const char * chars);
89 
90     /** Pointer to function that sets the string value to a specific 8 bit string in the default encoding
91      *  for this string. For instance, if this is an 8 bit string, then this function is the same as set8
92      *  but if the encoding is UTF16, then the pointer is assumed to point to 8 bit characters that must
93      *  be converted to UTF16 characters on the fly.
94      */
95     pANTLR3_UINT8   (*set8)	(struct ANTLR3_STRING_struct * string, const char * chars);
96 
97     /** Pointer to function adds a raw char * type pointer in the default encoding
98      *  for this string. For instance, if this is 8 bit, then this function is the same as append8
99      *  but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters not
100      *  8 bit.
101      */
102     pANTLR3_UINT8   (*append)	(struct ANTLR3_STRING_struct * string, const char * newbit);
103 
104     /** Pointer to function adds a raw char * type pointer in the default encoding
105      *  for this string. For instance, if this is a UTF16 string, then this function assumes the pointer
106      *  points to 8 bit characters that must be converted on the fly.
107      */
108     pANTLR3_UINT8   (*append8)	(struct ANTLR3_STRING_struct * string, const char * newbit);
109 
110     /** Pointer to function that inserts the supplied string at the specified
111      *  offset in the current string in the default encoding for this string. For instance, if this is an 8
112      *  bit string, then this is the same as insert8, but if this is a UTF16 string, then the pointer
113      *  must point to UTF16 characters.
114      */
115     pANTLR3_UINT8   (*insert)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
116 
117     /** Pointer to function that inserts the supplied string at the specified
118      *  offset in the current string in the default encoding for this string. For instance, if this is a UTF16 string
119      *  then the pointer is assumed to point at 8 bit characteres that must be converted on the fly.
120      */
121     pANTLR3_UINT8   (*insert8)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
122 
123     /** Pointer to function that sets the string value to a copy of the supplied string (strings must be in the
124      *  same encoding.
125      */
126     pANTLR3_UINT8   (*setS)	(struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * chars);
127 
128     /** Pointer to function appends a copy of the characters contained in another string. Strings must be in the
129      *  same encoding.
130      */
131     pANTLR3_UINT8   (*appendS)	(struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * newbit);
132 
133     /** Pointer to function that inserts a copy of the characters in the supplied string at the specified
134      *  offset in the current string. strings must be in the same encoding.
135      */
136     pANTLR3_UINT8   (*insertS)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, struct ANTLR3_STRING_struct * newbit);
137 
138     /** Pointer to function that inserts the supplied integer in string form at the specified
139      *  offset in the current string.
140      */
141     pANTLR3_UINT8   (*inserti)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
142 
143     /** Pointer to function that adds a single character to the end of the string, in the encoding of the
144      *  string - 8 bit, UTF16, utf-8 etc. Input is a single UTF32 (32 bits wide integer) character.
145      */
146     pANTLR3_UINT8   (*addc)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 c);
147 
148     /** Pointer to function that adds the stringified representation of an integer
149      *  to the string.
150      */
151     pANTLR3_UINT8   (*addi)	(struct ANTLR3_STRING_struct * string, ANTLR3_INT32 i);
152 
153     /** Pointer to function that compares the text of a string to the supplied
154      *  8 bit character string and returns a result a la strcmp()
155      */
156     ANTLR3_UINT32   (*compare8)	(struct ANTLR3_STRING_struct * string, const char * compStr);
157 
158     /** Pointer to a function that compares the text of a string with the supplied character string
159      *  (which is assumed to be in the same encoding as the string itself) and returns a result
160      *  a la strcmp()
161      */
162     ANTLR3_UINT32   (*compare)	(struct ANTLR3_STRING_struct * string, const char * compStr);
163 
164     /** Pointer to a function that compares the text of a string with the supplied string
165      *  (which is assumed to be in the same encoding as the string itself) and returns a result
166      *  a la strcmp()
167      */
168     ANTLR3_UINT32   (*compareS)	(struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * compStr);
169 
170     /** Pointer to a function that returns the character indexed at the supplied
171      *  offset as a 32 bit character.
172      */
173     ANTLR3_UCHAR    (*charAt)	(struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 offset);
174 
175     /** Pointer to a function that returns a substring of the supplied string a la .subString(s,e)
176      *  in the Java language.
177      */
178     struct ANTLR3_STRING_struct *
179 					(*subString)    (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
180 
181     /** Pointer to a function that returns the integer representation of any numeric characters
182      *  at the beginning of the string
183      */
184     ANTLR3_INT32	(*toInt32)	    (struct ANTLR3_STRING_struct * string);
185 
186     /** Pointer to a function that yields an 8 bit string regardless of the encoding of the supplied
187      *  string. This is useful when you want to use the text of a token in some way that requires an 8 bit
188      *  value, such as the key for a hashtable. The function is required to produce a usable string even
189      *  if the text given as input has characters that do not fit in 8 bit space, it will replace them
190      *  with some arbitrary character such as '?'
191      */
192     struct ANTLR3_STRING_struct *
193 					(*to8)	    (struct ANTLR3_STRING_struct * string);
194 
195 	/// Pointer to a function that yields a UT8 encoded string of the current string,
196 	/// regardless of the current encoding of the string. Because there is currently no UTF8
197 	/// handling in the string class, it creates therefore, a string that is useful only for read only
198 	/// applications as it will not contain methods that deal with UTF8 at the moment.
199 	///
200 	struct ANTLR3_STRING_struct *
201 					(*toUTF8)	(struct ANTLR3_STRING_struct * string);
202 
203 }
204     ANTLR3_STRING;
205 
206 /** Definition of the string factory interface, which creates and tracks
207  *  strings for you of various shapes and sizes.
208  */
209 typedef struct	ANTLR3_STRING_FACTORY_struct
210 {
211     /** List of all the strings that have been allocated by the factory
212      */
213     pANTLR3_VECTOR    strings;
214 
215     /* Index of next string that we allocate
216      */
217     ANTLR3_UINT32   index;
218 
219     /** Pointer to function that manufactures an empty string
220      */
221     pANTLR3_STRING  (*newRaw)	(struct ANTLR3_STRING_FACTORY_struct * factory);
222 
223     /** Pointer to function that manufactures a raw string with no text in it but space for size
224      *  characters.
225      */
226     pANTLR3_STRING  (*newSize)	(struct ANTLR3_STRING_FACTORY_struct * factory, ANTLR3_UINT32 size);
227 
228     /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed
229      *  to point to characters in the same encoding as the string type, hence if this is a UTF16 string the
230      *  pointer should point to UTF16 characters.
231      */
232     pANTLR3_STRING  (*newPtr)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
233 
234     /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed to
235      *  point at 8 bit characters which must be converted on the fly to the encoding of the actual string.
236      */
237     pANTLR3_STRING  (*newPtr8)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
238 
239     /** Pointer to function that manufactures a string from a given pointer and works out the length. The pointer is
240      *  assumed to point to characters in the same encoding as the string itself, i.e. UTF16 if a UTF16
241      *  string and so on.
242      */
243     pANTLR3_STRING  (*newStr)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
244 
245     /** Pointer to function that manufactures a string from a given pointer and length. The pointer should
246      *  point to 8 bit characters regardless of the actual encoding of the string. The 8 bit characters
247      *  will be converted to the actual string encoding on the fly.
248      */
249     pANTLR3_STRING  (*newStr8)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
250 
251     /** Pointer to function that deletes the string altogether
252      */
253     void	    (*destroy)	(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
254 
255     /** Pointer to function that returns a copy of the string in printable form without any control
256      *  characters in it.
257      */
258     pANTLR3_STRING  (*printable)(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
259 
260     /** Pointer to function that closes the factory
261      */
262     void	    (*close)	(struct ANTLR3_STRING_FACTORY_struct * factory);
263 
264 }
265     ANTLR3_STRING_FACTORY;
266 
267 #ifdef __cplusplus
268 }
269 #endif
270 
271 #endif
272 
273