• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************
2  * Licensed to the Apache Software Foundation (ASF) under one   *
3  * or more contributor license agreements.  See the NOTICE file *
4  * distributed with this work for additional information        *
5  * regarding copyright ownership.  The ASF licenses this file   *
6  * to you under the Apache License, Version 2.0 (the            *
7  * "License"); you may not use this file except in compliance   *
8  * with the License.  You may obtain a copy of the License at   *
9  *                                                              *
10  *   http://www.apache.org/licenses/LICENSE-2.0                 *
11  *                                                              *
12  * Unless required by applicable law or agreed to in writing,   *
13  * software distributed under the License is distributed on an  *
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15  * KIND, either express or implied.  See the License for the    *
16  * specific language governing permissions and limitations      *
17  * under the License.                                           *
18  ****************************************************************/
19 
20 package org.apache.james.mime4j;
21 
22 import com.android.emailcommon.utility.LoggingInputStream;
23 
24 import org.apache.james.mime4j.decoder.Base64InputStream;
25 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
26 
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.util.BitSet;
30 import java.util.LinkedList;
31 
32 /**
33  * <p>
34  * Parses MIME (or RFC822) message streams of bytes or characters and reports
35  * parsing events to a <code>ContentHandler</code> instance.
36  * </p>
37  * <p>
38  * Typical usage:<br/>
39  * <pre>
40  *      ContentHandler handler = new MyHandler();
41  *      MimeStreamParser parser = new MimeStreamParser();
42  *      parser.setContentHandler(handler);
43  *      parser.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
44  * </pre>
45  * <strong>NOTE:</strong> All lines must end with CRLF
46  * (<code>\r\n</code>). If you are unsure of the line endings in your stream
47  * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance.
48  *
49  *
50  * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $
51  */
52 public class MimeStreamParser {
53     private static final Log log = LogFactory.getLog(MimeStreamParser.class);
54 
55     private static final boolean DEBUG_LOG_MESSAGE = false; //DO NOT RELEASE AS 'TRUE'
56 
57     private static BitSet fieldChars = null;
58 
59     private RootInputStream rootStream = null;
60     private LinkedList<BodyDescriptor> bodyDescriptors = new LinkedList<BodyDescriptor>();
61     private ContentHandler handler = null;
62     private boolean raw = false;
63 
64     static {
65         fieldChars = new BitSet();
66         for (int i = 0x21; i <= 0x39; i++) {
67             fieldChars.set(i);
68         }
69         for (int i = 0x3b; i <= 0x7e; i++) {
70             fieldChars.set(i);
71         }
72     }
73 
74     /**
75      * Creates a new <code>MimeStreamParser</code> instance.
76      */
MimeStreamParser()77     public MimeStreamParser() {
78     }
79 
80     /**
81      * Parses a stream of bytes containing a MIME message.
82      *
83      * @param is the stream to parse.
84      * @throws IOException on I/O errors.
85      */
parse(InputStream is)86     public void parse(InputStream is) throws IOException {
87         if (DEBUG_LOG_MESSAGE) {
88             is = new LoggingInputStream(is, "MIME", true);
89         }
90         rootStream = new RootInputStream(is);
91         parseMessage(rootStream);
92     }
93 
94     /**
95      * Determines if this parser is currently in raw mode.
96      *
97      * @return <code>true</code> if in raw mode, <code>false</code>
98      *         otherwise.
99      * @see #setRaw(boolean)
100      */
isRaw()101     public boolean isRaw() {
102         return raw;
103     }
104 
105     /**
106      * Enables or disables raw mode. In raw mode all future entities
107      * (messages or body parts) in the stream will be reported to the
108      * {@link ContentHandler#raw(InputStream)} handler method only.
109      * The stream will contain the entire unparsed entity contents
110      * including header fields and whatever is in the body.
111      *
112      * @param raw <code>true</code> enables raw mode, <code>false</code>
113      *        disables it.
114      */
setRaw(boolean raw)115     public void setRaw(boolean raw) {
116         this.raw = raw;
117     }
118 
119     /**
120      * Finishes the parsing and stops reading lines.
121      * NOTE: No more lines will be parsed but the parser
122      * will still call
123      * {@link ContentHandler#endMultipart()},
124      * {@link ContentHandler#endBodyPart()},
125      * {@link ContentHandler#endMessage()}, etc to match previous calls
126      * to
127      * {@link ContentHandler#startMultipart(BodyDescriptor)},
128      * {@link ContentHandler#startBodyPart()},
129      * {@link ContentHandler#startMessage()}, etc.
130      */
stop()131     public void stop() {
132         rootStream.truncate();
133     }
134 
135     /**
136      * Parses an entity which consists of a header followed by a body containing
137      * arbitrary data, body parts or an embedded message.
138      *
139      * @param is the stream to parse.
140      * @throws IOException on I/O errors.
141      */
parseEntity(InputStream is)142     private void parseEntity(InputStream is) throws IOException {
143         BodyDescriptor bd = parseHeader(is);
144 
145         if (bd.isMultipart()) {
146             bodyDescriptors.addFirst(bd);
147 
148             handler.startMultipart(bd);
149 
150             MimeBoundaryInputStream tempIs =
151                 new MimeBoundaryInputStream(is, bd.getBoundary());
152             handler.preamble(new CloseShieldInputStream(tempIs));
153             tempIs.consume();
154 
155             while (tempIs.hasMoreParts()) {
156                 tempIs = new MimeBoundaryInputStream(is, bd.getBoundary());
157                 parseBodyPart(tempIs);
158                 tempIs.consume();
159                 if (tempIs.parentEOF()) {
160 //                    if (log.isWarnEnabled()) {
161 //                        log.warn("Line " + rootStream.getLineNumber()
162 //                                + ": Body part ended prematurely. "
163 //                                + "Higher level boundary detected or "
164 //                                + "EOF reached.");
165 //                    }
166                     break;
167                 }
168             }
169 
170             handler.epilogue(new CloseShieldInputStream(is));
171 
172             handler.endMultipart();
173 
174             bodyDescriptors.removeFirst();
175 
176         } else if (bd.isMessage()) {
177             if (bd.isBase64Encoded()) {
178                 log.warn("base64 encoded message/rfc822 detected");
179                 is = new EOLConvertingInputStream(
180                         new Base64InputStream(is));
181             } else if (bd.isQuotedPrintableEncoded()) {
182                 log.warn("quoted-printable encoded message/rfc822 detected");
183                 is = new EOLConvertingInputStream(
184                         new QuotedPrintableInputStream(is));
185             }
186             bodyDescriptors.addFirst(bd);
187             parseMessage(is);
188             bodyDescriptors.removeFirst();
189         } else {
190             handler.body(bd, new CloseShieldInputStream(is));
191         }
192 
193         /*
194          * Make sure the stream has been consumed.
195          */
196         while (is.read() != -1) {
197         }
198     }
199 
parseMessage(InputStream is)200     private void parseMessage(InputStream is) throws IOException {
201         if (raw) {
202             handler.raw(new CloseShieldInputStream(is));
203         } else {
204             handler.startMessage();
205             parseEntity(is);
206             handler.endMessage();
207         }
208     }
209 
parseBodyPart(InputStream is)210     private void parseBodyPart(InputStream is) throws IOException {
211         if (raw) {
212             handler.raw(new CloseShieldInputStream(is));
213         } else {
214             handler.startBodyPart();
215             parseEntity(is);
216             handler.endBodyPart();
217         }
218     }
219 
220     /**
221      * Parses a header.
222      *
223      * @param is the stream to parse.
224      * @return a <code>BodyDescriptor</code> describing the body following
225      *         the header.
226      */
parseHeader(InputStream is)227     private BodyDescriptor parseHeader(InputStream is) throws IOException {
228         BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty()
229                         ? null : (BodyDescriptor) bodyDescriptors.getFirst());
230 
231         handler.startHeader();
232 
233         int lineNumber = rootStream.getLineNumber();
234 
235         StringBuffer sb = new StringBuffer();
236         int curr = 0;
237         int prev = 0;
238         while ((curr = is.read()) != -1) {
239             if (curr == '\n' && (prev == '\n' || prev == 0)) {
240                 /*
241                  * [\r]\n[\r]\n or an immediate \r\n have been seen.
242                  */
243                 sb.deleteCharAt(sb.length() - 1);
244                 break;
245             }
246             sb.append((char) curr);
247             prev = curr == '\r' ? prev : curr;
248         }
249 
250 //        if (curr == -1 && log.isWarnEnabled()) {
251 //            log.warn("Line " + rootStream.getLineNumber()
252 //                    + ": Unexpected end of headers detected. "
253 //                    + "Boundary detected in header or EOF reached.");
254 //        }
255 
256         int start = 0;
257         int pos = 0;
258         int startLineNumber = lineNumber;
259         while (pos < sb.length()) {
260             while (pos < sb.length() && sb.charAt(pos) != '\r') {
261                 pos++;
262             }
263             if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
264                 pos++;
265                 continue;
266             }
267 
268             if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) {
269 
270                 /*
271                  * field should be the complete field data excluding the
272                  * trailing \r\n.
273                  */
274                 String field = sb.substring(start, pos);
275                 start = pos + 2;
276 
277                 /*
278                  * Check for a valid field.
279                  */
280                 int index = field.indexOf(':');
281                 boolean valid = false;
282                 if (index != -1 && fieldChars.get(field.charAt(0))) {
283                     valid = true;
284                     String fieldName = field.substring(0, index).trim();
285                     for (int i = 0; i < fieldName.length(); i++) {
286                         if (!fieldChars.get(fieldName.charAt(i))) {
287                             valid = false;
288                             break;
289                         }
290                     }
291 
292                     if (valid) {
293                         handler.field(field);
294                         bd.addField(fieldName, field.substring(index + 1));
295                     }
296                 }
297 
298                 if (!valid && log.isWarnEnabled()) {
299                     log.warn("Line " + startLineNumber
300                             + ": Ignoring invalid field: '" + field.trim() + "'");
301                 }
302 
303                 startLineNumber = lineNumber;
304             }
305 
306             pos += 2;
307             lineNumber++;
308         }
309 
310         handler.endHeader();
311 
312         return bd;
313     }
314 
315     /**
316      * Sets the <code>ContentHandler</code> to use when reporting
317      * parsing events.
318      *
319      * @param h the <code>ContentHandler</code>.
320      */
setContentHandler(ContentHandler h)321     public void setContentHandler(ContentHandler h) {
322         this.handler = h;
323     }
324 
325 }
326