1 /**************************************************************** 2 * Licensed to the Apache Software Foundation (ASF) under one * 3 * or more contributor license agreements. See the NOTICE file * 4 * distributed with this work for additional information * 5 * regarding copyright ownership. The ASF licenses this file * 6 * to you under the Apache License, Version 2.0 (the * 7 * "License"); you may not use this file except in compliance * 8 * with the License. You may obtain a copy of the License at * 9 * * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, * 13 * software distributed under the License is distributed on an * 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 15 * KIND, either express or implied. See the License for the * 16 * specific language governing permissions and limitations * 17 * under the License. * 18 ****************************************************************/ 19 20 package org.apache.james.mime4j.decoder; 21 22 import java.io.IOException; 23 import java.io.InputStream; 24 25 import org.apache.commons.logging.Log; 26 import org.apache.commons.logging.LogFactory; 27 28 /** 29 * Performs Quoted-Printable decoding on an underlying stream. 30 * 31 * 32 * 33 * @version $Id: QuotedPrintableInputStream.java,v 1.3 2004/11/29 13:15:47 ntherning Exp $ 34 */ 35 public class QuotedPrintableInputStream extends InputStream { 36 private static Log log = LogFactory.getLog(QuotedPrintableInputStream.class); 37 38 private InputStream stream; 39 ByteQueue byteq = new ByteQueue(); 40 ByteQueue pushbackq = new ByteQueue(); 41 private byte state = 0; 42 QuotedPrintableInputStream(InputStream stream)43 public QuotedPrintableInputStream(InputStream stream) { 44 this.stream = stream; 45 } 46 47 /** 48 * Closes the underlying stream. 49 * 50 * @throws IOException on I/O errors. 51 */ close()52 public void close() throws IOException { 53 stream.close(); 54 } 55 read()56 public int read() throws IOException { 57 fillBuffer(); 58 if (byteq.count() == 0) 59 return -1; 60 else { 61 byte val = byteq.dequeue(); 62 if (val >= 0) 63 return val; 64 else 65 return val & 0xFF; 66 } 67 } 68 69 /** 70 * Pulls bytes out of the underlying stream and places them in the 71 * pushback queue. This is necessary (vs. reading from the 72 * underlying stream directly) to detect and filter out "transport 73 * padding" whitespace, i.e., all whitespace that appears immediately 74 * before a CRLF. 75 * 76 * @throws IOException Underlying stream threw IOException. 77 */ populatePushbackQueue()78 private void populatePushbackQueue() throws IOException { 79 //Debug.verify(pushbackq.count() == 0, "PopulatePushbackQueue called when pushback queue was not empty!"); 80 81 if (pushbackq.count() != 0) 82 return; 83 84 while (true) { 85 int i = stream.read(); 86 switch (i) { 87 case -1: 88 // stream is done 89 pushbackq.clear(); // discard any whitespace preceding EOF 90 return; 91 case ' ': 92 case '\t': 93 pushbackq.enqueue((byte)i); 94 break; 95 case '\r': 96 case '\n': 97 pushbackq.clear(); // discard any whitespace preceding EOL 98 pushbackq.enqueue((byte)i); 99 return; 100 default: 101 pushbackq.enqueue((byte)i); 102 return; 103 } 104 } 105 } 106 107 /** 108 * Causes the pushback queue to get populated if it is empty, then 109 * consumes and decodes bytes out of it until one or more bytes are 110 * in the byte queue. This decoding step performs the actual QP 111 * decoding. 112 * 113 * @throws IOException Underlying stream threw IOException. 114 */ fillBuffer()115 private void fillBuffer() throws IOException { 116 byte msdChar = 0; // first digit of escaped num 117 while (byteq.count() == 0) { 118 if (pushbackq.count() == 0) { 119 populatePushbackQueue(); 120 if (pushbackq.count() == 0) 121 return; 122 } 123 124 byte b = (byte)pushbackq.dequeue(); 125 126 switch (state) { 127 case 0: // start state, no bytes pending 128 if (b != '=') { 129 byteq.enqueue(b); 130 break; // state remains 0 131 } else { 132 state = 1; 133 break; 134 } 135 case 1: // encountered "=" so far 136 if (b == '\r') { 137 state = 2; 138 break; 139 } else if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) { 140 state = 3; 141 msdChar = b; // save until next digit encountered 142 break; 143 } else if (b == '=') { 144 /* 145 * Special case when == is encountered. 146 * Emit one = and stay in this state. 147 */ 148 if (log.isWarnEnabled()) { 149 log.warn("Malformed MIME; got =="); 150 } 151 byteq.enqueue((byte)'='); 152 break; 153 } else { 154 if (log.isWarnEnabled()) { 155 log.warn("Malformed MIME; expected \\r or " 156 + "[0-9A-Z], got " + b); 157 } 158 state = 0; 159 byteq.enqueue((byte)'='); 160 byteq.enqueue(b); 161 break; 162 } 163 case 2: // encountered "=\r" so far 164 if (b == '\n') { 165 state = 0; 166 break; 167 } else { 168 if (log.isWarnEnabled()) { 169 log.warn("Malformed MIME; expected " 170 + (int)'\n' + ", got " + b); 171 } 172 state = 0; 173 byteq.enqueue((byte)'='); 174 byteq.enqueue((byte)'\r'); 175 byteq.enqueue(b); 176 break; 177 } 178 case 3: // encountered =<digit> so far; expecting another <digit> to complete the octet 179 if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) { 180 byte msd = asciiCharToNumericValue(msdChar); 181 byte low = asciiCharToNumericValue(b); 182 state = 0; 183 byteq.enqueue((byte)((msd << 4) | low)); 184 break; 185 } else { 186 if (log.isWarnEnabled()) { 187 log.warn("Malformed MIME; expected " 188 + "[0-9A-Z], got " + b); 189 } 190 state = 0; 191 byteq.enqueue((byte)'='); 192 byteq.enqueue(msdChar); 193 byteq.enqueue(b); 194 break; 195 } 196 default: // should never happen 197 log.error("Illegal state: " + state); 198 state = 0; 199 byteq.enqueue(b); 200 break; 201 } 202 } 203 } 204 205 /** 206 * Converts '0' => 0, 'A' => 10, etc. 207 * @param c ASCII character value. 208 * @return Numeric value of hexadecimal character. 209 */ asciiCharToNumericValue(byte c)210 private byte asciiCharToNumericValue(byte c) { 211 if (c >= '0' && c <= '9') { 212 return (byte)(c - '0'); 213 } else if (c >= 'A' && c <= 'Z') { 214 return (byte)(0xA + (c - 'A')); 215 } else if (c >= 'a' && c <= 'z') { 216 return (byte)(0xA + (c - 'a')); 217 } else { 218 /* 219 * This should never happen since all calls to this method 220 * are preceded by a check that c is in [0-9A-Za-z] 221 */ 222 throw new IllegalArgumentException((char) c 223 + " is not a hexadecimal digit"); 224 } 225 } 226 227 } 228