1 /* ==================================================================== 2 * Copyright (c) 2006 J.T. Beetstra 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * ==================================================================== 23 */ 24 25 package com.beetstra.jutf7; 26 27 import java.nio.charset.Charset; 28 import java.nio.charset.CharsetDecoder; 29 import java.nio.charset.CharsetEncoder; 30 import java.util.Arrays; 31 import java.util.List; 32 33 /** 34 * <p> 35 * Abstract base class for UTF-7 style encoding and decoding. 36 * </p> 37 * 38 * @author Jaap Beetstra 39 */ 40 abstract class UTF7StyleCharset extends Charset { 41 private static final List CONTAINED = Arrays.asList(new String[] { 42 "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE" 43 }); 44 final boolean strict; 45 Base64Util base64; 46 47 /** 48 * <p> 49 * Besides the name and aliases, two additional parameters are required. 50 * First the base 64 alphabet used; in modified UTF-7 a slightly different 51 * alphabet is used. Additionally, it should be specified if encoders and 52 * decoders should be strict about the interpretation of malformed encoded 53 * sequences. This is used since modified UTF-7 specifically disallows some 54 * constructs which are allowed (or not specifically disallowed) in UTF-7 55 * (RFC 2152). 56 * </p> 57 * 58 * @param canonicalName The name as defined in java.nio.charset.Charset 59 * @param aliases The aliases as defined in java.nio.charset.Charset 60 * @param alphabet The base 64 alphabet used 61 * @param strict True if strict handling of sequences is requested 62 */ UTF7StyleCharset(String canonicalName, String[] aliases, String alphabet, boolean strict)63 protected UTF7StyleCharset(String canonicalName, String[] aliases, String alphabet, 64 boolean strict) { 65 super(canonicalName, aliases); 66 this.base64 = new Base64Util(alphabet); 67 this.strict = strict; 68 } 69 70 /* 71 * (non-Javadoc) 72 * @see java.nio.charset.Charset#contains(java.nio.charset.Charset) 73 */ contains(final Charset cs)74 public boolean contains(final Charset cs) { 75 return CONTAINED.contains(cs.name()); 76 } 77 78 /* 79 * (non-Javadoc) 80 * @see java.nio.charset.Charset#newDecoder() 81 */ newDecoder()82 public CharsetDecoder newDecoder() { 83 return new UTF7StyleCharsetDecoder(this, base64, strict); 84 } 85 86 /* 87 * (non-Javadoc) 88 * @see java.nio.charset.Charset#newEncoder() 89 */ newEncoder()90 public CharsetEncoder newEncoder() { 91 return new UTF7StyleCharsetEncoder(this, base64, strict); 92 } 93 94 /** 95 * Tells if a character can be encoded using simple (US-ASCII) encoding or 96 * requires base 64 encoding. 97 * 98 * @param ch The character 99 * @return True if the character can be encoded directly, false otherwise 100 */ canEncodeDirectly(char ch)101 abstract boolean canEncodeDirectly(char ch); 102 103 /** 104 * Returns character used to switch to base 64 encoding. 105 * 106 * @return The shift character 107 */ shift()108 abstract byte shift(); 109 110 /** 111 * Returns character used to switch from base 64 encoding to simple 112 * encoding. 113 * 114 * @return The unshift character 115 */ unshift()116 abstract byte unshift(); 117 } 118