1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2000-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: uparse.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2000apr18 14 * created by: Markus W. Scherer 15 * 16 * This file provides a parser for files that are delimited by one single 17 * character like ';' or TAB. Example: the Unicode Character Properties files 18 * like UnicodeData.txt are semicolon-delimited. 19 */ 20 21 #ifndef __UPARSE_H__ 22 #define __UPARSE_H__ 23 24 #include "unicode/utypes.h" 25 26 /** 27 * Is c an invariant-character whitespace? 28 * @param c invariant character 29 */ 30 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') 31 32 U_CDECL_BEGIN 33 34 /** 35 * Skip space ' ' and TAB '\t' characters. 36 * 37 * @param s Pointer to characters. 38 * @return Pointer to first character at or after s that is not a space or TAB. 39 */ 40 U_CAPI const char * U_EXPORT2 41 u_skipWhitespace(const char *s); 42 43 /** 44 * Trim whitespace (including line endings) from the end of the string. 45 * 46 * @param s Pointer to the string. 47 * @return Pointer to the new end of the string. 48 */ 49 U_CAPI char * U_EXPORT2 50 u_rtrim(char *s); 51 52 /** Function type for u_parseDelimitedFile(). */ 53 typedef void U_CALLCONV 54 UParseLineFn(void *context, 55 char *fields[][2], 56 int32_t fieldCount, 57 UErrorCode *pErrorCode); 58 59 /** 60 * Parser for files that are similar to UnicodeData.txt: 61 * This function opens the file and reads it line by line. It skips empty lines 62 * and comment lines that start with a '#'. 63 * All other lines are separated into fields with one delimiter character 64 * (semicolon for Unicode Properties files) between two fields. The last field in 65 * a line does not need to be terminated with a delimiter. 66 * 67 * For each line, after segmenting it, a line function is called. 68 * It gets passed the array of field start and limit pointers that is 69 * passed into this parser and filled by it for each line. 70 * For each field i of the line, the start pointer in fields[i][0] 71 * points to the beginning of the field, while the limit pointer in fields[i][1] 72 * points behind the field, i.e., to the delimiter or the line end. 73 * 74 * The context parameter of the line function is 75 * the same as the one for the parse function. 76 * 77 * The line function may modify the contents of the fields including the 78 * limit characters. 79 * 80 * If the file cannot be opened, or there is a parsing error or a field function 81 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. 82 */ 83 U_CAPI void U_EXPORT2 84 u_parseDelimitedFile(const char *filename, char delimiter, 85 char *fields[][2], int32_t fieldCount, 86 UParseLineFn *lineFn, void *context, 87 UErrorCode *pErrorCode); 88 89 /** 90 * Parse a string of code points like 0061 0308 0300. 91 * s must end with either ';' or NUL. 92 * 93 * @return Number of code points. 94 */ 95 U_CAPI int32_t U_EXPORT2 96 u_parseCodePoints(const char *s, 97 uint32_t *dest, int32_t destCapacity, 98 UErrorCode *pErrorCode); 99 100 /** 101 * Parse a list of code points like 0061 0308 0300 102 * into a UChar * string. 103 * s must end with either ';' or NUL. 104 * 105 * Set the first code point in *pFirst. 106 * 107 * @param s Input char * string. 108 * @param dest Output string buffer. 109 * @param destCapacity Capacity of dest in numbers of UChars. 110 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first 111 * code point in the string. 112 * @param pErrorCode ICU error code. 113 * @return The length of the string in numbers of UChars. 114 */ 115 U_CAPI int32_t U_EXPORT2 116 u_parseString(const char *s, 117 UChar *dest, int32_t destCapacity, 118 uint32_t *pFirst, 119 UErrorCode *pErrorCode); 120 121 /** 122 * Parse a code point range like 123 * 0085 or 124 * 4E00..9FA5. 125 * 126 * s must contain such a range and end with either ';' or NUL. 127 * 128 * @return Length of code point range, end-start+1 129 */ 130 U_CAPI int32_t U_EXPORT2 131 u_parseCodePointRange(const char *s, 132 uint32_t *pStart, uint32_t *pEnd, 133 UErrorCode *pErrorCode); 134 135 /** 136 * Same as u_parseCodePointRange() but the range may be terminated by 137 * any character. The position of the terminating character is returned via 138 * the *terminator output parameter. 139 */ 140 U_CAPI int32_t U_EXPORT2 141 u_parseCodePointRangeAnyTerminator(const char *s, 142 uint32_t *pStart, uint32_t *pEnd, 143 const char **terminator, 144 UErrorCode *pErrorCode); 145 146 U_CAPI int32_t U_EXPORT2 147 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); 148 149 U_CDECL_END 150 151 #endif 152