1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11 Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13 Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net>
14 Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
15 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
16 Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
17 Licensed under the MIT license:
18
19 Permission is hereby granted, free of charge, to any person obtaining
20 a copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to permit
24 persons to whom the Software is furnished to do so, subject to the
25 following conditions:
26
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
33 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
34 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
35 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
36 USE OR OTHER DEALINGS IN THE SOFTWARE.
37 */
38
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <string.h>
43 #include <fcntl.h>
44
45 #ifdef _WIN32
46 # include "winconfig.h"
47 #endif
48
49 #include <expat_config.h>
50
51 #include "expat.h"
52 #include "internal.h" /* for UNUSED_P only */
53 #include "xmlfile.h"
54 #include "xmltchar.h"
55 #include "filemap.h"
56
57 #if defined(_MSC_VER)
58 # include <io.h>
59 #endif
60
61 #ifdef HAVE_UNISTD_H
62 # include <unistd.h>
63 #endif
64
65 #ifndef O_BINARY
66 # ifdef _O_BINARY
67 # define O_BINARY _O_BINARY
68 # else
69 # define O_BINARY 0
70 # endif
71 #endif
72
73 #ifdef _DEBUG
74 # define READ_SIZE 16
75 #else
76 # define READ_SIZE (1024 * 8)
77 #endif
78
79 typedef struct {
80 XML_Parser parser;
81 int *retPtr;
82 } PROCESS_ARGS;
83
84 static int processStream(const XML_Char *filename, XML_Parser parser);
85
86 static void
reportError(XML_Parser parser,const XML_Char * filename)87 reportError(XML_Parser parser, const XML_Char *filename) {
88 enum XML_Error code = XML_GetErrorCode(parser);
89 const XML_Char *message = XML_ErrorString(code);
90 if (message)
91 ftprintf(stdout,
92 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
93 T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
94 filename, XML_GetErrorLineNumber(parser),
95 XML_GetErrorColumnNumber(parser), message);
96 else
97 ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
98 }
99
100 /* This implementation will give problems on files larger than INT_MAX. */
101 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)102 processFile(const void *data, size_t size, const XML_Char *filename,
103 void *args) {
104 XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
105 int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
106 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
107 reportError(parser, filename);
108 *retPtr = 0;
109 } else
110 *retPtr = 1;
111 }
112
113 #if defined(_WIN32)
114
115 static int
isAsciiLetter(XML_Char c)116 isAsciiLetter(XML_Char c) {
117 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
118 }
119
120 #endif /* _WIN32 */
121
122 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)123 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
124 XML_Char **toFree) {
125 XML_Char *s;
126 *toFree = 0;
127 if (! base || *systemId == T('/')
128 #if defined(_WIN32)
129 || *systemId == T('\\')
130 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
131 #endif
132 )
133 return systemId;
134 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
135 * sizeof(XML_Char));
136 if (! *toFree)
137 return systemId;
138 tcscpy(*toFree, base);
139 s = *toFree;
140 if (tcsrchr(s, T('/')))
141 s = tcsrchr(s, T('/')) + 1;
142 #if defined(_WIN32)
143 if (tcsrchr(s, T('\\')))
144 s = tcsrchr(s, T('\\')) + 1;
145 #endif
146 tcscpy(s, systemId);
147 return *toFree;
148 }
149
150 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)151 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
152 const XML_Char *base, const XML_Char *systemId,
153 const XML_Char *publicId) {
154 int result;
155 XML_Char *s;
156 const XML_Char *filename;
157 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
158 int filemapRes;
159 PROCESS_ARGS args;
160 UNUSED_P(publicId);
161 args.retPtr = &result;
162 args.parser = entParser;
163 filename = resolveSystemId(base, systemId, &s);
164 XML_SetBase(entParser, filename);
165 filemapRes = filemap(filename, processFile, &args);
166 switch (filemapRes) {
167 case 0:
168 result = 0;
169 break;
170 case 2:
171 ftprintf(stderr,
172 T("%s: file too large for memory-mapping")
173 T(", switching to streaming\n"),
174 filename);
175 result = processStream(filename, entParser);
176 break;
177 }
178 free(s);
179 XML_ParserFree(entParser);
180 return result;
181 }
182
183 static int
processStream(const XML_Char * filename,XML_Parser parser)184 processStream(const XML_Char *filename, XML_Parser parser) {
185 /* passing NULL for filename means read input from stdin */
186 int fd = 0; /* 0 is the fileno for stdin */
187
188 if (filename != NULL) {
189 fd = topen(filename, O_BINARY | O_RDONLY);
190 if (fd < 0) {
191 tperror(filename);
192 return 0;
193 }
194 }
195 for (;;) {
196 int nread;
197 char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
198 if (! buf) {
199 if (filename != NULL)
200 close(fd);
201 ftprintf(stderr, T("%s: out of memory\n"),
202 filename != NULL ? filename : T("xmlwf"));
203 return 0;
204 }
205 nread = read(fd, buf, READ_SIZE);
206 if (nread < 0) {
207 tperror(filename != NULL ? filename : T("STDIN"));
208 if (filename != NULL)
209 close(fd);
210 return 0;
211 }
212 if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
213 reportError(parser, filename != NULL ? filename : T("STDIN"));
214 if (filename != NULL)
215 close(fd);
216 return 0;
217 }
218 if (nread == 0) {
219 if (filename != NULL)
220 close(fd);
221 break;
222 ;
223 }
224 }
225 return 1;
226 }
227
228 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)229 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
230 const XML_Char *base, const XML_Char *systemId,
231 const XML_Char *publicId) {
232 XML_Char *s;
233 const XML_Char *filename;
234 int ret;
235 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
236 UNUSED_P(publicId);
237 filename = resolveSystemId(base, systemId, &s);
238 XML_SetBase(entParser, filename);
239 ret = processStream(filename, entParser);
240 free(s);
241 XML_ParserFree(entParser);
242 return ret;
243 }
244
245 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)246 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
247 int result;
248
249 if (! XML_SetBase(parser, filename)) {
250 ftprintf(stderr, T("%s: out of memory"), filename);
251 exit(1);
252 }
253
254 if (flags & XML_EXTERNAL_ENTITIES)
255 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
256 ? externalEntityRefFilemap
257 : externalEntityRefStream);
258 if (flags & XML_MAP_FILE) {
259 int filemapRes;
260 PROCESS_ARGS args;
261 args.retPtr = &result;
262 args.parser = parser;
263 filemapRes = filemap(filename, processFile, &args);
264 switch (filemapRes) {
265 case 0:
266 result = 0;
267 break;
268 case 2:
269 ftprintf(stderr,
270 T("%s: file too large for memory-mapping")
271 T(", switching to streaming\n"),
272 filename);
273 result = processStream(filename, parser);
274 break;
275 }
276 } else
277 result = processStream(filename, parser);
278 return result;
279 }
280