1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.email.mail.internet; 18 19 import com.android.email.Email; 20 import com.android.email.mail.Body; 21 import com.android.email.mail.BodyPart; 22 import com.android.email.mail.Message; 23 import com.android.email.mail.MessagingException; 24 import com.android.email.mail.Multipart; 25 import com.android.email.mail.Part; 26 27 import org.apache.commons.io.IOUtils; 28 import org.apache.james.mime4j.codec.EncoderUtil; 29 import org.apache.james.mime4j.decoder.Base64InputStream; 30 import org.apache.james.mime4j.decoder.DecoderUtil; 31 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 32 import org.apache.james.mime4j.util.CharsetUtil; 33 34 import android.util.Log; 35 36 import java.io.ByteArrayOutputStream; 37 import java.io.IOException; 38 import java.io.InputStream; 39 import java.io.OutputStream; 40 import java.util.ArrayList; 41 import java.util.regex.Matcher; 42 import java.util.regex.Pattern; 43 44 public class MimeUtility { 45 46 private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); 47 48 /** 49 * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string 50 * object whenever possible. 51 */ unfold(String s)52 public static String unfold(String s) { 53 if (s == null) { 54 return null; 55 } 56 Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); 57 if (patternMatcher.find()) { 58 patternMatcher.reset(); 59 s = patternMatcher.replaceAll(""); 60 } 61 return s; 62 } 63 decode(String s)64 public static String decode(String s) { 65 if (s == null) { 66 return null; 67 } 68 return DecoderUtil.decodeEncodedWords(s); 69 } 70 unfoldAndDecode(String s)71 public static String unfoldAndDecode(String s) { 72 return decode(unfold(s)); 73 } 74 75 // TODO implement proper foldAndEncode 76 // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent 77 // duplication of encoding. foldAndEncode(String s)78 public static String foldAndEncode(String s) { 79 return s; 80 } 81 82 /** 83 * INTERIM version of foldAndEncode that will be used only by Subject: headers. 84 * This is safer than implementing foldAndEncode() (see above) and risking unknown damage 85 * to other headers. 86 * 87 * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. 88 * 89 * @param s original string to encode and fold 90 * @param usedCharacters number of characters already used up by header name 91 92 * @return the String ready to be transmitted 93 */ foldAndEncode2(String s, int usedCharacters)94 public static String foldAndEncode2(String s, int usedCharacters) { 95 // james.mime4j.codec.EncoderUtil.java 96 // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) 97 // Usage.TEXT_TOKENlooks like the right thing for subjects 98 // use WORD_ENTITY for address/names 99 100 String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, 101 usedCharacters); 102 103 return fold(encoded, usedCharacters); 104 } 105 106 /** 107 * INTERIM: From newer version of org.apache.james (but we don't want to import 108 * the entire MimeUtil class). 109 * 110 * Splits the specified string into a multiple-line representation with 111 * lines no longer than 76 characters (because the line might contain 112 * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 113 * 2047</a> section 2). If the string contains non-whitespace sequences 114 * longer than 76 characters a line break is inserted at the whitespace 115 * character following the sequence resulting in a line longer than 76 116 * characters. 117 * 118 * @param s 119 * string to split. 120 * @param usedCharacters 121 * number of characters already used up. Usually the number of 122 * characters for header field name plus colon and one space. 123 * @return a multiple-line representation of the given string. 124 */ fold(String s, int usedCharacters)125 public static String fold(String s, int usedCharacters) { 126 final int maxCharacters = 76; 127 128 final int length = s.length(); 129 if (usedCharacters + length <= maxCharacters) 130 return s; 131 132 StringBuilder sb = new StringBuilder(); 133 134 int lastLineBreak = -usedCharacters; 135 int wspIdx = indexOfWsp(s, 0); 136 while (true) { 137 if (wspIdx == length) { 138 sb.append(s.substring(Math.max(0, lastLineBreak))); 139 return sb.toString(); 140 } 141 142 int nextWspIdx = indexOfWsp(s, wspIdx + 1); 143 144 if (nextWspIdx - lastLineBreak > maxCharacters) { 145 sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); 146 sb.append("\r\n"); 147 lastLineBreak = wspIdx; 148 } 149 150 wspIdx = nextWspIdx; 151 } 152 } 153 154 /** 155 * INTERIM: From newer version of org.apache.james (but we don't want to import 156 * the entire MimeUtil class). 157 * 158 * Search for whitespace. 159 */ indexOfWsp(String s, int fromIndex)160 private static int indexOfWsp(String s, int fromIndex) { 161 final int len = s.length(); 162 for (int index = fromIndex; index < len; index++) { 163 char c = s.charAt(index); 164 if (c == ' ' || c == '\t') 165 return index; 166 } 167 return len; 168 } 169 170 /** 171 * Returns the named parameter of a header field. If name is null the first 172 * parameter is returned, or if there are no additional parameters in the 173 * field the entire field is returned. Otherwise the named parameter is 174 * searched for in a case insensitive fashion and returned. If the parameter 175 * cannot be found the method returns null. 176 * 177 * TODO: quite inefficient with the inner trimming & splitting. 178 * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive. 179 * TODO: The doc says that for a null name you get the first param, but you get the header. 180 * Should probably just fix the doc, but if other code assumes that behavior, fix the code. 181 * 182 * @param header 183 * @param name 184 * @return 185 */ getHeaderParameter(String header, String name)186 public static String getHeaderParameter(String header, String name) { 187 if (header == null) { 188 return null; 189 } 190 String[] parts = unfold(header).split(";"); 191 if (name == null) { 192 return parts[0]; 193 } 194 String lowerCaseName = name.toLowerCase(); 195 for (String part : parts) { 196 if (part.trim().toLowerCase().startsWith(lowerCaseName)) { 197 String parameter = part.split("=", 2)[1].trim(); 198 if (parameter.startsWith("\"") && parameter.endsWith("\"")) { 199 return parameter.substring(1, parameter.length() - 1); 200 } 201 else { 202 return parameter; 203 } 204 } 205 } 206 return null; 207 } 208 findFirstPartByMimeType(Part part, String mimeType)209 public static Part findFirstPartByMimeType(Part part, String mimeType) 210 throws MessagingException { 211 if (part.getBody() instanceof Multipart) { 212 Multipart multipart = (Multipart)part.getBody(); 213 for (int i = 0, count = multipart.getCount(); i < count; i++) { 214 BodyPart bodyPart = multipart.getBodyPart(i); 215 Part ret = findFirstPartByMimeType(bodyPart, mimeType); 216 if (ret != null) { 217 return ret; 218 } 219 } 220 } 221 else if (part.getMimeType().equalsIgnoreCase(mimeType)) { 222 return part; 223 } 224 return null; 225 } 226 findPartByContentId(Part part, String contentId)227 public static Part findPartByContentId(Part part, String contentId) throws Exception { 228 if (part.getBody() instanceof Multipart) { 229 Multipart multipart = (Multipart)part.getBody(); 230 for (int i = 0, count = multipart.getCount(); i < count; i++) { 231 BodyPart bodyPart = multipart.getBodyPart(i); 232 Part ret = findPartByContentId(bodyPart, contentId); 233 if (ret != null) { 234 return ret; 235 } 236 } 237 } 238 String cid = part.getContentId(); 239 if (contentId.equals(cid)) { 240 return part; 241 } 242 return null; 243 } 244 245 /** 246 * Reads the Part's body and returns a String based on any charset conversion that needed 247 * to be done. 248 * @param part The part containing a body 249 * @return a String containing the converted text in the body, or null if there was no text 250 * or an error during conversion. 251 */ getTextFromPart(Part part)252 public static String getTextFromPart(Part part) { 253 try { 254 if (part != null && part.getBody() != null) { 255 InputStream in = part.getBody().getInputStream(); 256 String mimeType = part.getMimeType(); 257 if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { 258 /* 259 * Now we read the part into a buffer for further processing. Because 260 * the stream is now wrapped we'll remove any transfer encoding at this point. 261 */ 262 ByteArrayOutputStream out = new ByteArrayOutputStream(); 263 IOUtils.copy(in, out); 264 in.close(); 265 in = null; // we want all of our memory back, and close might not release 266 267 /* 268 * We've got a text part, so let's see if it needs to be processed further. 269 */ 270 String charset = getHeaderParameter(part.getContentType(), "charset"); 271 if (charset != null) { 272 /* 273 * See if there is conversion from the MIME charset to the Java one. 274 */ 275 charset = CharsetUtil.toJavaCharset(charset); 276 } 277 /* 278 * No encoding, so use us-ascii, which is the standard. 279 */ 280 if (charset == null) { 281 charset = "ASCII"; 282 } 283 /* 284 * Convert and return as new String 285 */ 286 String result = out.toString(charset); 287 out.close(); 288 return result; 289 } 290 } 291 292 } 293 catch (OutOfMemoryError oom) { 294 /* 295 * If we are not able to process the body there's nothing we can do about it. Return 296 * null and let the upper layers handle the missing content. 297 */ 298 Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + oom.toString()); 299 } 300 catch (Exception e) { 301 /* 302 * If we are not able to process the body there's nothing we can do about it. Return 303 * null and let the upper layers handle the missing content. 304 */ 305 Log.e(Email.LOG_TAG, "Unable to getTextFromPart " + e.toString()); 306 } 307 return null; 308 } 309 310 /** 311 * Returns true if the given mimeType matches the matchAgainst specification. The comparison 312 * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). 313 * 314 * @param mimeType A MIME type to check. 315 * @param matchAgainst A MIME type to check against. May include wildcards. 316 * @return true if the mimeType matches 317 */ mimeTypeMatches(String mimeType, String matchAgainst)318 public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { 319 Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), 320 Pattern.CASE_INSENSITIVE); 321 return p.matcher(mimeType).matches(); 322 } 323 324 /** 325 * Returns true if the given mimeType matches any of the matchAgainst specifications. The 326 * comparison ignores case and the matchAgainst strings may include "*" for a wildcard 327 * (e.g. "image/*"). 328 * 329 * @param mimeType A MIME type to check. 330 * @param matchAgainst An array of MIME types to check against. May include wildcards. 331 * @return true if the mimeType matches any of the matchAgainst strings 332 */ mimeTypeMatches(String mimeType, String[] matchAgainst)333 public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { 334 for (String matchType : matchAgainst) { 335 if (mimeTypeMatches(mimeType, matchType)) { 336 return true; 337 } 338 } 339 return false; 340 } 341 342 /** 343 * Removes any content transfer encoding from the stream and returns a Body. 344 */ decodeBody(InputStream in, String contentTransferEncoding)345 public static Body decodeBody(InputStream in, String contentTransferEncoding) 346 throws IOException { 347 /* 348 * We'll remove any transfer encoding by wrapping the stream. 349 */ 350 if (contentTransferEncoding != null) { 351 contentTransferEncoding = 352 MimeUtility.getHeaderParameter(contentTransferEncoding, null); 353 if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { 354 in = new QuotedPrintableInputStream(in); 355 } 356 else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { 357 in = new Base64InputStream(in); 358 } 359 } 360 361 BinaryTempFileBody tempBody = new BinaryTempFileBody(); 362 OutputStream out = tempBody.getOutputStream(); 363 IOUtils.copy(in, out); 364 out.close(); 365 return tempBody; 366 } 367 368 /** 369 * An unfortunately named method that makes decisions about a Part (usually a Message) 370 * as to which of it's children will be "viewable" and which will be attachments. 371 * The method recursively sorts the viewables and attachments into seperate 372 * lists for further processing. 373 * @param part 374 * @param viewables 375 * @param attachments 376 * @throws MessagingException 377 */ collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)378 public static void collectParts(Part part, ArrayList<Part> viewables, 379 ArrayList<Part> attachments) throws MessagingException { 380 String disposition = part.getDisposition(); 381 String dispositionType = null; 382 String dispositionFilename = null; 383 if (disposition != null) { 384 dispositionType = MimeUtility.getHeaderParameter(disposition, null); 385 dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename"); 386 } 387 388 /* 389 * A best guess that this part is intended to be an attachment and not inline. 390 */ 391 boolean attachment = ("attachment".equalsIgnoreCase(dispositionType)) 392 || (dispositionFilename != null) 393 && (!"inline".equalsIgnoreCase(dispositionType)); 394 395 /* 396 * If the part is Multipart but not alternative it's either mixed or 397 * something we don't know about, which means we treat it as mixed 398 * per the spec. We just process it's pieces recursively. 399 */ 400 if (part.getBody() instanceof Multipart) { 401 Multipart mp = (Multipart)part.getBody(); 402 for (int i = 0; i < mp.getCount(); i++) { 403 collectParts(mp.getBodyPart(i), viewables, attachments); 404 } 405 } 406 /* 407 * If the part is an embedded message we just continue to process 408 * it, pulling any viewables or attachments into the running list. 409 */ 410 else if (part.getBody() instanceof Message) { 411 Message message = (Message)part.getBody(); 412 collectParts(message, viewables, attachments); 413 } 414 /* 415 * If the part is HTML and it got this far it's part of a mixed (et 416 * al) and should be rendered inline. 417 */ 418 else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) { 419 viewables.add(part); 420 } 421 /* 422 * If the part is plain text and it got this far it's part of a 423 * mixed (et al) and should be rendered inline. 424 */ 425 else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) { 426 viewables.add(part); 427 } 428 /* 429 * Finally, if it's nothing else we will include it as an attachment. 430 */ 431 else { 432 attachments.add(part); 433 } 434 } 435 } 436