• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11    Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
13    Copyright (c) 2005-2007 Steven Solie <ssolie@users.sourceforge.net>
14    Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
15    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
16    Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
17    Licensed under the MIT license:
18 
19    Permission is  hereby granted,  free of charge,  to any  person obtaining
20    a  copy  of  this  software   and  associated  documentation  files  (the
21    "Software"),  to  deal in  the  Software  without restriction,  including
22    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
23    distribute, sublicense, and/or sell copies of the Software, and to permit
24    persons  to whom  the Software  is  furnished to  do so,  subject to  the
25    following conditions:
26 
27    The above copyright  notice and this permission notice  shall be included
28    in all copies or substantial portions of the Software.
29 
30    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
31    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
32    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
33    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
34    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
35    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
36    USE OR OTHER DEALINGS IN THE SOFTWARE.
37 */
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <string.h>
43 #include <fcntl.h>
44 
45 #ifdef _WIN32
46 #  include "winconfig.h"
47 #endif
48 
49 #include <expat_config.h>
50 
51 #include "expat.h"
52 #include "internal.h" /* for UNUSED_P only */
53 #include "xmlfile.h"
54 #include "xmltchar.h"
55 #include "filemap.h"
56 
57 #if defined(_MSC_VER)
58 #  include <io.h>
59 #endif
60 
61 #ifdef HAVE_UNISTD_H
62 #  include <unistd.h>
63 #endif
64 
65 #ifndef O_BINARY
66 #  ifdef _O_BINARY
67 #    define O_BINARY _O_BINARY
68 #  else
69 #    define O_BINARY 0
70 #  endif
71 #endif
72 
73 #ifdef _DEBUG
74 #  define READ_SIZE 16
75 #else
76 #  define READ_SIZE (1024 * 8)
77 #endif
78 
79 typedef struct {
80   XML_Parser parser;
81   int *retPtr;
82 } PROCESS_ARGS;
83 
84 static int processStream(const XML_Char *filename, XML_Parser parser);
85 
86 static void
reportError(XML_Parser parser,const XML_Char * filename)87 reportError(XML_Parser parser, const XML_Char *filename) {
88   enum XML_Error code = XML_GetErrorCode(parser);
89   const XML_Char *message = XML_ErrorString(code);
90   if (message)
91     ftprintf(stdout,
92              T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
93                  T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
94              filename, XML_GetErrorLineNumber(parser),
95              XML_GetErrorColumnNumber(parser), message);
96   else
97     ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
98 }
99 
100 /* This implementation will give problems on files larger than INT_MAX. */
101 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)102 processFile(const void *data, size_t size, const XML_Char *filename,
103             void *args) {
104   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
105   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
106   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
107     reportError(parser, filename);
108     *retPtr = 0;
109   } else
110     *retPtr = 1;
111 }
112 
113 #if defined(_WIN32)
114 
115 static int
isAsciiLetter(XML_Char c)116 isAsciiLetter(XML_Char c) {
117   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
118 }
119 
120 #endif /* _WIN32 */
121 
122 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)123 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
124                 XML_Char **toFree) {
125   XML_Char *s;
126   *toFree = 0;
127   if (! base || *systemId == T('/')
128 #if defined(_WIN32)
129       || *systemId == T('\\')
130       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
131 #endif
132   )
133     return systemId;
134   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
135                                * sizeof(XML_Char));
136   if (! *toFree)
137     return systemId;
138   tcscpy(*toFree, base);
139   s = *toFree;
140   if (tcsrchr(s, T('/')))
141     s = tcsrchr(s, T('/')) + 1;
142 #if defined(_WIN32)
143   if (tcsrchr(s, T('\\')))
144     s = tcsrchr(s, T('\\')) + 1;
145 #endif
146   tcscpy(s, systemId);
147   return *toFree;
148 }
149 
150 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)151 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
152                          const XML_Char *base, const XML_Char *systemId,
153                          const XML_Char *publicId) {
154   int result;
155   XML_Char *s;
156   const XML_Char *filename;
157   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
158   int filemapRes;
159   PROCESS_ARGS args;
160   UNUSED_P(publicId);
161   args.retPtr = &result;
162   args.parser = entParser;
163   filename = resolveSystemId(base, systemId, &s);
164   XML_SetBase(entParser, filename);
165   filemapRes = filemap(filename, processFile, &args);
166   switch (filemapRes) {
167   case 0:
168     result = 0;
169     break;
170   case 2:
171     ftprintf(stderr,
172              T("%s: file too large for memory-mapping")
173                  T(", switching to streaming\n"),
174              filename);
175     result = processStream(filename, entParser);
176     break;
177   }
178   free(s);
179   XML_ParserFree(entParser);
180   return result;
181 }
182 
183 static int
processStream(const XML_Char * filename,XML_Parser parser)184 processStream(const XML_Char *filename, XML_Parser parser) {
185   /* passing NULL for filename means read input from stdin */
186   int fd = 0; /* 0 is the fileno for stdin */
187 
188   if (filename != NULL) {
189     fd = topen(filename, O_BINARY | O_RDONLY);
190     if (fd < 0) {
191       tperror(filename);
192       return 0;
193     }
194   }
195   for (;;) {
196     int nread;
197     char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
198     if (! buf) {
199       if (filename != NULL)
200         close(fd);
201       ftprintf(stderr, T("%s: out of memory\n"),
202                filename != NULL ? filename : T("xmlwf"));
203       return 0;
204     }
205     nread = read(fd, buf, READ_SIZE);
206     if (nread < 0) {
207       tperror(filename != NULL ? filename : T("STDIN"));
208       if (filename != NULL)
209         close(fd);
210       return 0;
211     }
212     if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
213       reportError(parser, filename != NULL ? filename : T("STDIN"));
214       if (filename != NULL)
215         close(fd);
216       return 0;
217     }
218     if (nread == 0) {
219       if (filename != NULL)
220         close(fd);
221       break;
222       ;
223     }
224   }
225   return 1;
226 }
227 
228 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)229 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
230                         const XML_Char *base, const XML_Char *systemId,
231                         const XML_Char *publicId) {
232   XML_Char *s;
233   const XML_Char *filename;
234   int ret;
235   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
236   UNUSED_P(publicId);
237   filename = resolveSystemId(base, systemId, &s);
238   XML_SetBase(entParser, filename);
239   ret = processStream(filename, entParser);
240   free(s);
241   XML_ParserFree(entParser);
242   return ret;
243 }
244 
245 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)246 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
247   int result;
248 
249   if (! XML_SetBase(parser, filename)) {
250     ftprintf(stderr, T("%s: out of memory"), filename);
251     exit(1);
252   }
253 
254   if (flags & XML_EXTERNAL_ENTITIES)
255     XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
256                                                 ? externalEntityRefFilemap
257                                                 : externalEntityRefStream);
258   if (flags & XML_MAP_FILE) {
259     int filemapRes;
260     PROCESS_ARGS args;
261     args.retPtr = &result;
262     args.parser = parser;
263     filemapRes = filemap(filename, processFile, &args);
264     switch (filemapRes) {
265     case 0:
266       result = 0;
267       break;
268     case 2:
269       ftprintf(stderr,
270                T("%s: file too large for memory-mapping")
271                    T(", switching to streaming\n"),
272                filename);
273       result = processStream(filename, parser);
274       break;
275     }
276   } else
277     result = processStream(filename, parser);
278   return result;
279 }
280