1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.emailcommon.internet; 18 19 import android.util.Base64; 20 import android.util.Base64DataException; 21 import android.util.Base64InputStream; 22 import android.util.Log; 23 24 import com.android.emailcommon.Logging; 25 import com.android.emailcommon.mail.Body; 26 import com.android.emailcommon.mail.BodyPart; 27 import com.android.emailcommon.mail.Message; 28 import com.android.emailcommon.mail.MessagingException; 29 import com.android.emailcommon.mail.Multipart; 30 import com.android.emailcommon.mail.Part; 31 32 import org.apache.commons.io.IOUtils; 33 import org.apache.james.mime4j.codec.EncoderUtil; 34 import org.apache.james.mime4j.decoder.DecoderUtil; 35 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 36 import org.apache.james.mime4j.util.CharsetUtil; 37 38 import java.io.ByteArrayOutputStream; 39 import java.io.IOException; 40 import java.io.InputStream; 41 import java.io.OutputStream; 42 import java.util.ArrayList; 43 import java.util.regex.Matcher; 44 import java.util.regex.Pattern; 45 46 public class MimeUtility { 47 48 public static final String MIME_TYPE_RFC822 = "message/rfc822"; 49 private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); 50 51 /** 52 * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string 53 * object whenever possible. 54 */ unfold(String s)55 public static String unfold(String s) { 56 if (s == null) { 57 return null; 58 } 59 Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); 60 if (patternMatcher.find()) { 61 patternMatcher.reset(); 62 s = patternMatcher.replaceAll(""); 63 } 64 return s; 65 } 66 decode(String s)67 public static String decode(String s) { 68 if (s == null) { 69 return null; 70 } 71 return DecoderUtil.decodeEncodedWords(s); 72 } 73 unfoldAndDecode(String s)74 public static String unfoldAndDecode(String s) { 75 return decode(unfold(s)); 76 } 77 78 // TODO implement proper foldAndEncode 79 // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent 80 // duplication of encoding. foldAndEncode(String s)81 public static String foldAndEncode(String s) { 82 return s; 83 } 84 85 /** 86 * INTERIM version of foldAndEncode that will be used only by Subject: headers. 87 * This is safer than implementing foldAndEncode() (see above) and risking unknown damage 88 * to other headers. 89 * 90 * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. 91 * 92 * @param s original string to encode and fold 93 * @param usedCharacters number of characters already used up by header name 94 95 * @return the String ready to be transmitted 96 */ foldAndEncode2(String s, int usedCharacters)97 public static String foldAndEncode2(String s, int usedCharacters) { 98 // james.mime4j.codec.EncoderUtil.java 99 // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) 100 // Usage.TEXT_TOKENlooks like the right thing for subjects 101 // use WORD_ENTITY for address/names 102 103 String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, 104 usedCharacters); 105 106 return fold(encoded, usedCharacters); 107 } 108 109 /** 110 * INTERIM: From newer version of org.apache.james (but we don't want to import 111 * the entire MimeUtil class). 112 * 113 * Splits the specified string into a multiple-line representation with 114 * lines no longer than 76 characters (because the line might contain 115 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 116 * 2047</a> section 2). If the string contains non-whitespace sequences 117 * longer than 76 characters a line break is inserted at the whitespace 118 * character following the sequence resulting in a line longer than 76 119 * characters. 120 * 121 * @param s 122 * string to split. 123 * @param usedCharacters 124 * number of characters already used up. Usually the number of 125 * characters for header field name plus colon and one space. 126 * @return a multiple-line representation of the given string. 127 */ fold(String s, int usedCharacters)128 public static String fold(String s, int usedCharacters) { 129 final int maxCharacters = 76; 130 131 final int length = s.length(); 132 if (usedCharacters + length <= maxCharacters) 133 return s; 134 135 StringBuilder sb = new StringBuilder(); 136 137 int lastLineBreak = -usedCharacters; 138 int wspIdx = indexOfWsp(s, 0); 139 while (true) { 140 if (wspIdx == length) { 141 sb.append(s.substring(Math.max(0, lastLineBreak))); 142 return sb.toString(); 143 } 144 145 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 146 147 if (nextWspIdx - lastLineBreak > maxCharacters) { 148 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 149 sb.append("\r\n"); 150 lastLineBreak = wspIdx; 151 } 152 153 wspIdx = nextWspIdx; 154 } 155 } 156 157 /** 158 * INTERIM: From newer version of org.apache.james (but we don't want to import 159 * the entire MimeUtil class). 160 * 161 * Search for whitespace. 162 */ indexOfWsp(String s, int fromIndex)163 private static int indexOfWsp(String s, int fromIndex) { 164 final int len = s.length(); 165 for (int index = fromIndex; index < len; index++) { 166 char c = s.charAt(index); 167 if (c == ' ' || c == '\t') 168 return index; 169 } 170 return len; 171 } 172 173 /** 174 * Returns the named parameter of a header field. If name is null the first 175 * parameter is returned, or if there are no additional parameters in the 176 * field the entire field is returned. Otherwise the named parameter is 177 * searched for in a case insensitive fashion and returned. If the parameter 178 * cannot be found the method returns null. 179 * 180 * TODO: quite inefficient with the inner trimming & splitting. 181 * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive. 182 * TODO: The doc says that for a null name you get the first param, but you get the header. 183 * Should probably just fix the doc, but if other code assumes that behavior, fix the code. 184 * TODO: Need to decode %-escaped strings, as in: filename="ab%22d". 185 * ('+' -> ' ' conversion too? check RFC) 186 * 187 * @param header 188 * @param name 189 * @return the entire header (if name=null), the found parameter, or null 190 */ getHeaderParameter(String header, String name)191 public static String getHeaderParameter(String header, String name) { 192 if (header == null) { 193 return null; 194 } 195 String[] parts = unfold(header).split(";"); 196 if (name == null) { 197 return parts[0].trim(); 198 } 199 String lowerCaseName = name.toLowerCase(); 200 for (String part : parts) { 201 if (part.trim().toLowerCase().startsWith(lowerCaseName)) { 202 String[] parameterParts = part.split("=", 2); 203 if (parameterParts.length < 2) { 204 return null; 205 } 206 String parameter = parameterParts[1].trim(); 207 if (parameter.startsWith("\"") && parameter.endsWith("\"")) { 208 return parameter.substring(1, parameter.length() - 1); 209 } else { 210 return parameter; 211 } 212 } 213 } 214 return null; 215 } 216 findFirstPartByMimeType(Part part, String mimeType)217 public static Part findFirstPartByMimeType(Part part, String mimeType) 218 throws MessagingException { 219 if (part.getBody() instanceof Multipart) { 220 Multipart multipart = (Multipart)part.getBody(); 221 for (int i = 0, count = multipart.getCount(); i < count; i++) { 222 BodyPart bodyPart = multipart.getBodyPart(i); 223 Part ret = findFirstPartByMimeType(bodyPart, mimeType); 224 if (ret != null) { 225 return ret; 226 } 227 } 228 } 229 else if (part.getMimeType().equalsIgnoreCase(mimeType)) { 230 return part; 231 } 232 return null; 233 } 234 findPartByContentId(Part part, String contentId)235 public static Part findPartByContentId(Part part, String contentId) throws Exception { 236 if (part.getBody() instanceof Multipart) { 237 Multipart multipart = (Multipart)part.getBody(); 238 for (int i = 0, count = multipart.getCount(); i < count; i++) { 239 BodyPart bodyPart = multipart.getBodyPart(i); 240 Part ret = findPartByContentId(bodyPart, contentId); 241 if (ret != null) { 242 return ret; 243 } 244 } 245 } 246 String cid = part.getContentId(); 247 if (contentId.equals(cid)) { 248 return part; 249 } 250 return null; 251 } 252 253 /** 254 * Reads the Part's body and returns a String based on any charset conversion that needed 255 * to be done. 256 * @param part The part containing a body 257 * @return a String containing the converted text in the body, or null if there was no text 258 * or an error during conversion. 259 */ getTextFromPart(Part part)260 public static String getTextFromPart(Part part) { 261 try { 262 if (part != null && part.getBody() != null) { 263 InputStream in = part.getBody().getInputStream(); 264 String mimeType = part.getMimeType(); 265 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { 266 /* 267 * Now we read the part into a buffer for further processing. Because 268 * the stream is now wrapped we'll remove any transfer encoding at this point. 269 */ 270 ByteArrayOutputStream out = new ByteArrayOutputStream(); 271 IOUtils.copy(in, out); 272 in.close(); 273 in = null; // we want all of our memory back, and close might not release 274 275 /* 276 * We've got a text part, so let's see if it needs to be processed further. 277 */ 278 String charset = getHeaderParameter(part.getContentType(), "charset"); 279 if (charset != null) { 280 /* 281 * See if there is conversion from the MIME charset to the Java one. 282 */ 283 charset = CharsetUtil.toJavaCharset(charset); 284 } 285 /* 286 * No encoding, so use us-ascii, which is the standard. 287 */ 288 if (charset == null) { 289 charset = "ASCII"; 290 } 291 /* 292 * Convert and return as new String 293 */ 294 String result = out.toString(charset); 295 out.close(); 296 return result; 297 } 298 } 299 300 } 301 catch (OutOfMemoryError oom) { 302 /* 303 * If we are not able to process the body there's nothing we can do about it. Return 304 * null and let the upper layers handle the missing content. 305 */ 306 Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + oom.toString()); 307 } 308 catch (Exception e) { 309 /* 310 * If we are not able to process the body there's nothing we can do about it. Return 311 * null and let the upper layers handle the missing content. 312 */ 313 Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + e.toString()); 314 } 315 return null; 316 } 317 318 /** 319 * Returns true if the given mimeType matches the matchAgainst specification. The comparison 320 * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). 321 * 322 * @param mimeType A MIME type to check. 323 * @param matchAgainst A MIME type to check against. May include wildcards. 324 * @return true if the mimeType matches 325 */ mimeTypeMatches(String mimeType, String matchAgainst)326 public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { 327 Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), 328 Pattern.CASE_INSENSITIVE); 329 return p.matcher(mimeType).matches(); 330 } 331 332 /** 333 * Returns true if the given mimeType matches any of the matchAgainst specifications. The 334 * comparison ignores case and the matchAgainst strings may include "*" for a wildcard 335 * (e.g. "image/*"). 336 * 337 * @param mimeType A MIME type to check. 338 * @param matchAgainst An array of MIME types to check against. May include wildcards. 339 * @return true if the mimeType matches any of the matchAgainst strings 340 */ mimeTypeMatches(String mimeType, String[] matchAgainst)341 public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { 342 for (String matchType : matchAgainst) { 343 if (mimeTypeMatches(mimeType, matchType)) { 344 return true; 345 } 346 } 347 return false; 348 } 349 350 /** 351 * Given an input stream and a transfer encoding, return a wrapped input stream for that 352 * encoding (or the original if none is required) 353 * @param in the input stream 354 * @param contentTransferEncoding the content transfer encoding 355 * @return a properly wrapped stream 356 */ getInputStreamForContentTransferEncoding(InputStream in, String contentTransferEncoding)357 public static InputStream getInputStreamForContentTransferEncoding(InputStream in, 358 String contentTransferEncoding) { 359 if (contentTransferEncoding != null) { 360 contentTransferEncoding = 361 MimeUtility.getHeaderParameter(contentTransferEncoding, null); 362 if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { 363 in = new QuotedPrintableInputStream(in); 364 } 365 else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { 366 in = new Base64InputStream(in, Base64.DEFAULT); 367 } 368 } 369 return in; 370 } 371 372 /** 373 * Removes any content transfer encoding from the stream and returns a Body. 374 */ decodeBody(InputStream in, String contentTransferEncoding)375 public static Body decodeBody(InputStream in, String contentTransferEncoding) 376 throws IOException { 377 /* 378 * We'll remove any transfer encoding by wrapping the stream. 379 */ 380 in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding); 381 BinaryTempFileBody tempBody = new BinaryTempFileBody(); 382 OutputStream out = tempBody.getOutputStream(); 383 try { 384 IOUtils.copy(in, out); 385 } catch (Base64DataException bde) { 386 // TODO Need to fix this somehow 387 //String warning = "\n\n" + Email.getMessageDecodeErrorString(); 388 //out.write(warning.getBytes()); 389 } finally { 390 out.close(); 391 } 392 return tempBody; 393 } 394 395 /** 396 * Recursively scan a Part (usually a Message) and sort out which of its children will be 397 * "viewable" and which will be attachments. 398 * 399 * @param part The part to be broken down 400 * @param viewables This arraylist will be populated with all parts that appear to be 401 * the "message" (e.g. text/plain & text/html) 402 * @param attachments This arraylist will be populated with all parts that appear to be 403 * attachments (including inlines) 404 * @throws MessagingException 405 */ collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)406 public static void collectParts(Part part, ArrayList<Part> viewables, 407 ArrayList<Part> attachments) throws MessagingException { 408 String disposition = part.getDisposition(); 409 String dispositionType = null; 410 String dispositionFilename = null; 411 if (disposition != null) { 412 dispositionType = MimeUtility.getHeaderParameter(disposition, null); 413 dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename"); 414 } 415 // An attachment filename can be defined in either the Content-Disposition header 416 // or the Content-Type header. Content-Disposition is preferred, so we only try 417 // the Content-Type header as a last resort. 418 if (dispositionFilename == null) { 419 String contentType = part.getContentType(); 420 dispositionFilename = MimeUtility.getHeaderParameter(contentType, "name"); 421 } 422 boolean attachmentDisposition = "attachment".equalsIgnoreCase(dispositionType); 423 // If a disposition is not specified, default to "inline" 424 boolean inlineDisposition = dispositionType == null 425 || "inline".equalsIgnoreCase(dispositionType); 426 427 // A guess that this part is intended to be an attachment 428 boolean attachment = attachmentDisposition 429 || (dispositionFilename != null && !inlineDisposition); 430 431 // A guess that this part is intended to be an inline. 432 boolean inline = inlineDisposition && (dispositionFilename != null); 433 434 // One or the other 435 boolean attachmentOrInline = attachment || inline; 436 437 if (part.getBody() instanceof Multipart) { 438 // If the part is Multipart but not alternative it's either mixed or 439 // something we don't know about, which means we treat it as mixed 440 // per the spec. We just process its pieces recursively. 441 MimeMultipart mp = (MimeMultipart)part.getBody(); 442 boolean foundHtml = false; 443 if (mp.getSubTypeForTest().equals("alternative")) { 444 for (int i = 0; i < mp.getCount(); i++) { 445 if (mp.getBodyPart(i).isMimeType("text/html")) { 446 foundHtml = true; 447 break; 448 } 449 } 450 } 451 for (int i = 0; i < mp.getCount(); i++) { 452 // See if we have text and html 453 BodyPart bp = mp.getBodyPart(i); 454 // If there's html, don't bother loading text 455 if (foundHtml && bp.isMimeType("text/plain")) { 456 continue; 457 } 458 collectParts(bp, viewables, attachments); 459 } 460 } else if (part.getBody() instanceof Message) { 461 // If the part is an embedded message we just continue to process 462 // it, pulling any viewables or attachments into the running list. 463 Message message = (Message)part.getBody(); 464 collectParts(message, viewables, attachments); 465 } else if ((!attachmentOrInline) && ("text/html".equalsIgnoreCase(part.getMimeType()))) { 466 // If the part is HTML and we got this far, it's a viewable part of a mixed 467 viewables.add(part); 468 } else if ((!attachmentOrInline) && ("text/plain".equalsIgnoreCase(part.getMimeType()))) { 469 // If the part is text and we got this far, it's a viewable part of a mixed 470 viewables.add(part); 471 } else if (attachmentOrInline) { 472 // Finally, if it's an attachment or an inline we will include it as an attachment. 473 attachments.add(part); 474 } 475 } 476 } 477