1 /** \file 2 * Simple string interface allows indiscriminate allocation of strings 3 * such that they can be allocated all over the place and released in 4 * one chunk via a string factory - saves lots of hassle in remembering what 5 * strings were allocated where. 6 */ 7 #ifndef _ANTLR3_STRING_H 8 #define _ANTLR3_STRING_H 9 10 // [The "BSD licence"] 11 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 12 // http://www.temporal-wave.com 13 // http://www.linkedin.com/in/jimidle 14 // 15 // All rights reserved. 16 // 17 // Redistribution and use in source and binary forms, with or without 18 // modification, are permitted provided that the following conditions 19 // are met: 20 // 1. Redistributions of source code must retain the above copyright 21 // notice, this list of conditions and the following disclaimer. 22 // 2. Redistributions in binary form must reproduce the above copyright 23 // notice, this list of conditions and the following disclaimer in the 24 // documentation and/or other materials provided with the distribution. 25 // 3. The name of the author may not be used to endorse or promote products 26 // derived from this software without specific prior written permission. 27 // 28 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 29 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 30 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 31 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 32 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 33 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 37 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 39 #include <antlr3defs.h> 40 #include <antlr3collections.h> 41 42 #ifdef __cplusplus 43 extern "C" { 44 #endif 45 46 /** Base string class tracks the allocations and provides simple string 47 * tracking functions. Mostly you can work directly on the string for things 48 * that don't reallocate it, like strchr() etc. Perhaps someone will want to provide implementations for UTF8 49 * and so on. 50 */ 51 typedef struct ANTLR3_STRING_struct 52 { 53 54 /** The factory that created this string 55 */ 56 pANTLR3_STRING_FACTORY factory; 57 58 /** Pointer to the current string value (starts at NULL unless 59 * the string allocator is told to create it with a pre known size. 60 */ 61 pANTLR3_UINT8 chars; 62 63 /** Current length of the string up to and not including, the trailing '\0' 64 * Note that the actual allocation (->size) 65 * is always at least one byte more than this to accommodate trailing '\0' 66 */ 67 ANTLR3_UINT32 len; 68 69 /** Current size of the string in bytes including the trailing '\0' 70 */ 71 ANTLR3_UINT32 size; 72 73 /** Index of string (allocation number) in case someone wants 74 * to explicitly release it. 75 */ 76 ANTLR3_UINT32 index; 77 78 /** Occasionally it is useful to know what the encoding of the string 79 * actually is, hence it is stored here as one the ANTLR3_ENCODING_ values 80 */ 81 ANTLR3_UINT8 encoding; 82 83 /** Pointer to function that sets the string value to a specific string in the default encoding 84 * for this string. For instance, if this is 8 bit, then this function is the same as set8 85 * but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters, not 86 * 8 bit. 87 */ 88 pANTLR3_UINT8 (*set) (struct ANTLR3_STRING_struct * string, const char * chars); 89 90 /** Pointer to function that sets the string value to a specific 8 bit string in the default encoding 91 * for this string. For instance, if this is an 8 bit string, then this function is the same as set8 92 * but if the encoding is UTF16, then the pointer is assumed to point to 8 bit characters that must 93 * be converted to UTF16 characters on the fly. 94 */ 95 pANTLR3_UINT8 (*set8) (struct ANTLR3_STRING_struct * string, const char * chars); 96 97 /** Pointer to function adds a raw char * type pointer in the default encoding 98 * for this string. For instance, if this is 8 bit, then this function is the same as append8 99 * but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters not 100 * 8 bit. 101 */ 102 pANTLR3_UINT8 (*append) (struct ANTLR3_STRING_struct * string, const char * newbit); 103 104 /** Pointer to function adds a raw char * type pointer in the default encoding 105 * for this string. For instance, if this is a UTF16 string, then this function assumes the pointer 106 * points to 8 bit characters that must be converted on the fly. 107 */ 108 pANTLR3_UINT8 (*append8) (struct ANTLR3_STRING_struct * string, const char * newbit); 109 110 /** Pointer to function that inserts the supplied string at the specified 111 * offset in the current string in the default encoding for this string. For instance, if this is an 8 112 * bit string, then this is the same as insert8, but if this is a UTF16 string, then the pointer 113 * must point to UTF16 characters. 114 */ 115 pANTLR3_UINT8 (*insert) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit); 116 117 /** Pointer to function that inserts the supplied string at the specified 118 * offset in the current string in the default encoding for this string. For instance, if this is a UTF16 string 119 * then the pointer is assumed to point at 8 bit characteres that must be converted on the fly. 120 */ 121 pANTLR3_UINT8 (*insert8) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit); 122 123 /** Pointer to function that sets the string value to a copy of the supplied string (strings must be in the 124 * same encoding. 125 */ 126 pANTLR3_UINT8 (*setS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * chars); 127 128 /** Pointer to function appends a copy of the characters contained in another string. Strings must be in the 129 * same encoding. 130 */ 131 pANTLR3_UINT8 (*appendS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * newbit); 132 133 /** Pointer to function that inserts a copy of the characters in the supplied string at the specified 134 * offset in the current string. strings must be in the same encoding. 135 */ 136 pANTLR3_UINT8 (*insertS) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, struct ANTLR3_STRING_struct * newbit); 137 138 /** Pointer to function that inserts the supplied integer in string form at the specified 139 * offset in the current string. 140 */ 141 pANTLR3_UINT8 (*inserti) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, ANTLR3_INT32 i); 142 143 /** Pointer to function that adds a single character to the end of the string, in the encoding of the 144 * string - 8 bit, UTF16, utf-8 etc. Input is a single UTF32 (32 bits wide integer) character. 145 */ 146 pANTLR3_UINT8 (*addc) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 c); 147 148 /** Pointer to function that adds the stringified representation of an integer 149 * to the string. 150 */ 151 pANTLR3_UINT8 (*addi) (struct ANTLR3_STRING_struct * string, ANTLR3_INT32 i); 152 153 /** Pointer to function that compares the text of a string to the supplied 154 * 8 bit character string and returns a result a la strcmp() 155 */ 156 ANTLR3_UINT32 (*compare8) (struct ANTLR3_STRING_struct * string, const char * compStr); 157 158 /** Pointer to a function that compares the text of a string with the supplied character string 159 * (which is assumed to be in the same encoding as the string itself) and returns a result 160 * a la strcmp() 161 */ 162 ANTLR3_UINT32 (*compare) (struct ANTLR3_STRING_struct * string, const char * compStr); 163 164 /** Pointer to a function that compares the text of a string with the supplied string 165 * (which is assumed to be in the same encoding as the string itself) and returns a result 166 * a la strcmp() 167 */ 168 ANTLR3_UINT32 (*compareS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * compStr); 169 170 /** Pointer to a function that returns the character indexed at the supplied 171 * offset as a 32 bit character. 172 */ 173 ANTLR3_UCHAR (*charAt) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 offset); 174 175 /** Pointer to a function that returns a substring of the supplied string a la .subString(s,e) 176 * in the Java language. 177 */ 178 struct ANTLR3_STRING_struct * 179 (*subString) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); 180 181 /** Pointer to a function that returns the integer representation of any numeric characters 182 * at the beginning of the string 183 */ 184 ANTLR3_INT32 (*toInt32) (struct ANTLR3_STRING_struct * string); 185 186 /** Pointer to a function that yields an 8 bit string regardless of the encoding of the supplied 187 * string. This is useful when you want to use the text of a token in some way that requires an 8 bit 188 * value, such as the key for a hashtable. The function is required to produce a usable string even 189 * if the text given as input has characters that do not fit in 8 bit space, it will replace them 190 * with some arbitrary character such as '?' 191 */ 192 struct ANTLR3_STRING_struct * 193 (*to8) (struct ANTLR3_STRING_struct * string); 194 195 /// Pointer to a function that yields a UT8 encoded string of the current string, 196 /// regardless of the current encoding of the string. Because there is currently no UTF8 197 /// handling in the string class, it creates therefore, a string that is useful only for read only 198 /// applications as it will not contain methods that deal with UTF8 at the moment. 199 /// 200 struct ANTLR3_STRING_struct * 201 (*toUTF8) (struct ANTLR3_STRING_struct * string); 202 203 } 204 ANTLR3_STRING; 205 206 /** Definition of the string factory interface, which creates and tracks 207 * strings for you of various shapes and sizes. 208 */ 209 typedef struct ANTLR3_STRING_FACTORY_struct 210 { 211 /** List of all the strings that have been allocated by the factory 212 */ 213 pANTLR3_VECTOR strings; 214 215 /* Index of next string that we allocate 216 */ 217 ANTLR3_UINT32 index; 218 219 /** Pointer to function that manufactures an empty string 220 */ 221 pANTLR3_STRING (*newRaw) (struct ANTLR3_STRING_FACTORY_struct * factory); 222 223 /** Pointer to function that manufactures a raw string with no text in it but space for size 224 * characters. 225 */ 226 pANTLR3_STRING (*newSize) (struct ANTLR3_STRING_FACTORY_struct * factory, ANTLR3_UINT32 size); 227 228 /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed 229 * to point to characters in the same encoding as the string type, hence if this is a UTF16 string the 230 * pointer should point to UTF16 characters. 231 */ 232 pANTLR3_STRING (*newPtr) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); 233 234 /** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed to 235 * point at 8 bit characters which must be converted on the fly to the encoding of the actual string. 236 */ 237 pANTLR3_STRING (*newPtr8) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); 238 239 /** Pointer to function that manufactures a string from a given pointer and works out the length. The pointer is 240 * assumed to point to characters in the same encoding as the string itself, i.e. UTF16 if a UTF16 241 * string and so on. 242 */ 243 pANTLR3_STRING (*newStr) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string); 244 245 /** Pointer to function that manufactures a string from a given pointer and length. The pointer should 246 * point to 8 bit characters regardless of the actual encoding of the string. The 8 bit characters 247 * will be converted to the actual string encoding on the fly. 248 */ 249 pANTLR3_STRING (*newStr8) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string); 250 251 /** Pointer to function that deletes the string altogether 252 */ 253 void (*destroy) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string); 254 255 /** Pointer to function that returns a copy of the string in printable form without any control 256 * characters in it. 257 */ 258 pANTLR3_STRING (*printable)(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string); 259 260 /** Pointer to function that closes the factory 261 */ 262 void (*close) (struct ANTLR3_STRING_FACTORY_struct * factory); 263 264 } 265 ANTLR3_STRING_FACTORY; 266 267 #ifdef __cplusplus 268 } 269 #endif 270 271 #endif 272 273