1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
11 Copyright (c) 2016-2018 Sebastian Pipping <sebastian@pipping.org>
12 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
13 Licensed under the MIT license:
14
15 Permission is hereby granted, free of charge, to any person obtaining
16 a copy of this software and associated documentation files (the
17 "Software"), to deal in the Software without restriction, including
18 without limitation the rights to use, copy, modify, merge, publish,
19 distribute, sublicense, and/or sell copies of the Software, and to permit
20 persons to whom the Software is furnished to do so, subject to the
21 following conditions:
22
23 The above copyright notice and this permission notice shall be included
24 in all copies or substantial portions of the Software.
25
26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
29 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
30 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
31 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
32 USE OR OTHER DEALINGS IN THE SOFTWARE.
33 */
34
35 #include <string.h>
36 #include "xmlmime.h"
37
38 static const char *
getTok(const char ** pp)39 getTok(const char **pp) {
40 /* inComment means one level of nesting; inComment+1 means two levels etc */
41 enum { inAtom, inString, init, inComment };
42 int state = init;
43 const char *tokStart = 0;
44 for (;;) {
45 switch (**pp) {
46 case '\0':
47 if (state == inAtom)
48 return tokStart;
49 return 0;
50 case ' ':
51 case '\r':
52 case '\t':
53 case '\n':
54 if (state == inAtom)
55 return tokStart;
56 break;
57 case '(':
58 if (state == inAtom)
59 return tokStart;
60 if (state != inString)
61 state++;
62 break;
63 case ')':
64 if (state > init)
65 --state;
66 else if (state != inString)
67 return 0;
68 break;
69 case ';':
70 case '/':
71 case '=':
72 if (state == inAtom)
73 return tokStart;
74 if (state == init)
75 return (*pp)++;
76 break;
77 case '\\':
78 ++*pp;
79 if (**pp == '\0')
80 return 0;
81 break;
82 case '"':
83 switch (state) {
84 case inString:
85 ++*pp;
86 return tokStart;
87 case inAtom:
88 return tokStart;
89 case init:
90 tokStart = *pp;
91 state = inString;
92 break;
93 }
94 break;
95 default:
96 if (state == init) {
97 tokStart = *pp;
98 state = inAtom;
99 }
100 break;
101 }
102 ++*pp;
103 }
104 /* not reached */
105 }
106
107 /* key must be lowercase ASCII */
108
109 static int
matchkey(const char * start,const char * end,const char * key)110 matchkey(const char *start, const char *end, const char *key) {
111 if (! start)
112 return 0;
113 for (; start != end; start++, key++)
114 if (*start != *key && *start != 'A' + (*key - 'a'))
115 return 0;
116 return *key == '\0';
117 }
118
119 void
getXMLCharset(const char * buf,char * charset)120 getXMLCharset(const char *buf, char *charset) {
121 const char *next, *p;
122
123 charset[0] = '\0';
124 next = buf;
125 p = getTok(&next);
126 if (matchkey(p, next, "text"))
127 strcpy(charset, "us-ascii");
128 else if (! matchkey(p, next, "application"))
129 return;
130 p = getTok(&next);
131 if (! p || *p != '/')
132 return;
133 p = getTok(&next);
134 /* BEGIN disabled code */
135 if (0) {
136 if (! matchkey(p, next, "xml") && charset[0] == '\0')
137 return;
138 }
139 /* END disabled code */
140 p = getTok(&next);
141 while (p) {
142 if (*p == ';') {
143 p = getTok(&next);
144 if (matchkey(p, next, "charset")) {
145 p = getTok(&next);
146 if (p && *p == '=') {
147 p = getTok(&next);
148 if (p) {
149 char *s = charset;
150 if (*p == '"') {
151 while (++p != next - 1) {
152 if (*p == '\\')
153 ++p;
154 if (s == charset + CHARSET_MAX - 1) {
155 charset[0] = '\0';
156 break;
157 }
158 *s++ = *p;
159 }
160 *s++ = '\0';
161 } else {
162 if (next - p > CHARSET_MAX - 1)
163 break;
164 while (p != next)
165 *s++ = *p++;
166 *s = 0;
167 break;
168 }
169 }
170 }
171 break;
172 }
173 } else
174 p = getTok(&next);
175 }
176 }
177
178 #ifdef TEST
179
180 # include <stdio.h>
181
182 int
main(int argc,char * argv[])183 main(int argc, char *argv[]) {
184 char buf[CHARSET_MAX];
185 if (argc <= 1)
186 return 1;
187 printf("%s\n", argv[1]);
188 getXMLCharset(argv[1], buf);
189 printf("charset=\"%s\"\n", buf);
190 return 0;
191 }
192
193 #endif /* TEST */
194