• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File ucbuf.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   05/10/01    Ram         Creation.
15 *
16 * This API reads in files and returns UChars
17 *******************************************************************************
18 */
19 
20 #include "unicode/ucnv.h"
21 #include "filestrm.h"
22 
23 #if !UCONFIG_NO_CONVERSION
24 
25 #ifndef UCBUF_H
26 #define UCBUF_H 1
27 
28 typedef struct UCHARBUF UCHARBUF;
29 /**
30  * End of file value
31  */
32 #define U_EOF 0xFFFFFFFF
33 /**
34  * Error value if a sequence cannot be unescaped
35  */
36 #define U_ERR 0xFFFFFFFE
37 
38 typedef struct ULine ULine;
39 
40 struct  ULine {
41     UChar     *name;
42     int32_t   len;
43 };
44 
45 /**
46  * Opens the UCHARBUF with the given file stream and code page for conversion
47  * @param fileName  Name of the file to open.
48  * @param codepage  The encoding of the file stream to convert to Unicode.
49  *                  If *codepoge is NULL on input the API will try to autodetect
50  *                  popular Unicode encodings
51  * @param showWarning Flag to print out warnings to STDOUT
52  * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads
53  *                  the whole file into memory and converts it.
54  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
55  *        indicates a failure on entry, the function will immediately return.
56  *        On exit the value will indicate the success of the operation.
57  * @return pointer to the newly opened UCHARBUF
58  */
59 U_CAPI UCHARBUF* U_EXPORT2
60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
61 
62 /**
63  * Gets a UTF-16 code unit at the current position from the converted buffer
64  * and increments the current position
65  * @param buf Pointer to UCHARBUF structure
66  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
67  *        indicates a failure on entry, the function will immediately return.
68  *        On exit the value will indicate the success of the operation.
69  */
70 U_CAPI int32_t U_EXPORT2
71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
72 
73 /**
74  * Gets a UTF-32 code point at the current position from the converted buffer
75  * and increments the current position
76  * @param buf Pointer to UCHARBUF structure
77  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
78  *        indicates a failure on entry, the function will immediately return.
79  *        On exit the value will indicate the success of the operation.
80  */
81 U_CAPI int32_t U_EXPORT2
82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
83 
84 /**
85  * Gets a UTF-16 code unit at the current position from the converted buffer after
86  * unescaping and increments the current position. If the escape sequence is for UTF-32
87  * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
88  * @param buf Pointer to UCHARBUF structure
89  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
90  *        indicates a failure on entry, the function will immediately return.
91  *        On exit the value will indicate the success of the operation.
92  */
93 U_CAPI int32_t U_EXPORT2
94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
95 
96 /**
97  * Gets a pointer to the current position in the internal buffer and length of the line.
98  * It imperative to make a copy of the returned buffere before performing operations on it.
99  * @param buf Pointer to UCHARBUF structure
100  * @param len Output param to receive the len of the buffer returned till end of the line
101  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
102  *        indicates a failure on entry, the function will immediately return.
103  *        On exit the value will indicate the success of the operation.
104  *        Error: U_TRUNCATED_CHAR_FOUND
105  * @return Pointer to the internal buffer, NULL if EOF
106  */
107 U_CAPI const UChar* U_EXPORT2
108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
109 
110 
111 /**
112  * Resets the buffers and the underlying file stream.
113  * @param buf Pointer to UCHARBUF structure
114  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
115  *        indicates a failure on entry, the function will immediately return.
116  *        On exit the value will indicate the success of the operation.
117  */
118 U_CAPI void U_EXPORT2
119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
120 
121 /**
122  * Returns a pointer to the internal converted buffer
123  * @param buf Pointer to UCHARBUF structure
124  * @param len Pointer to int32_t to receive the lenth of buffer
125  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
126  *        indicates a failure on entry, the function will immediately return.
127  *        On exit the value will indicate the success of the operation.
128  * @return Pointer to internal UChar buffer
129  */
130 U_CAPI const UChar* U_EXPORT2
131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
132 
133 /**
134  * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
135  * @param buf Pointer to UCHARBUF structure
136  */
137 U_CAPI void U_EXPORT2
138 ucbuf_close(UCHARBUF* buf);
139 
140 #if U_SHOW_CPLUSPLUS_API
141 
142 U_NAMESPACE_BEGIN
143 
144 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
145 
146 U_NAMESPACE_END
147 
148 #endif
149 
150 /**
151  * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
152  */
153 U_CAPI void U_EXPORT2
154 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
155 
156 
157 /**
158  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
159  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
160  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
161  * is necessary.
162  * If the charset was autodetected, the caller must close both the input FileStream
163  * and the converter.
164  *
165  * @param fileName The file name to be opened and encoding autodected
166  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
167  * @param cp Output param to receive the detected encoding
168  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
169  *        indicates a failure on entry, the function will immediately return.
170  *        On exit the value will indicate the success of the operation.
171  * @return The input FileStream if its charset was autodetected; NULL otherwise.
172  */
173 U_CAPI FileStream * U_EXPORT2
174 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
175 int32_t* signatureLength, UErrorCode* status);
176 
177 /**
178  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
179  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
180  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
181  * is necessary.
182  * If the charset was autodetected, the caller must close the converter.
183  *
184  * @param fileStream The file stream whose encoding is to be detected
185  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
186  * @param cp Output param to receive the detected encoding
187  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
188  *        indicates a failure on entry, the function will immediately return.
189  *        On exit the value will indicate the success of the operation.
190  * @return Boolean whether the Unicode charset was autodetected.
191  */
192 
193 U_CAPI UBool U_EXPORT2
194 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
195 
196 /**
197  * Returns the approximate size in UChars required for converting the file to UChars
198  */
199 U_CAPI int32_t U_EXPORT2
200 ucbuf_size(UCHARBUF* buf);
201 
202 U_CAPI const char* U_EXPORT2
203 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
204 
205 #endif
206 #endif
207 
208