• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ====================================================================
2  * Copyright (c) 2006 J.T. Beetstra
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  * ====================================================================
23  */
24 
25 package com.beetstra.jutf7;
26 
27 import java.nio.ByteBuffer;
28 import java.nio.CharBuffer;
29 import java.nio.charset.CharsetEncoder;
30 import java.nio.charset.CoderResult;
31 
32 /**
33  * <p>
34  * The CharsetEncoder used to encode both variants of the UTF-7 charset and the
35  * modified-UTF-7 charset.
36  * </p>
37  * <p>
38  * <strong>Please note this class does not behave strictly according to the
39  * specification in Sun Java VMs before 1.6.</strong> This is done to get around
40  * a bug in the implementation of
41  * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)}. Unfortunately,
42  * that method cannot be overridden.
43  * </p>
44  *
45  * @see <a href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">JDK
46  *      bug 6221056< /a>
47  * @author Jaap Beetstra
48  */
49 class UTF7StyleCharsetEncoder extends CharsetEncoder {
50     private static final float AVG_BYTES_PER_CHAR = 1.5f;
51     private static final float MAX_BYTES_PER_CHAR = 5.0f;
52     private final UTF7StyleCharset cs;
53     private final Base64Util base64;
54     private final byte shift;
55     private final byte unshift;
56     private final boolean strict;
57     private boolean base64mode;
58     private int bitsToOutput;
59     private int sextet;
60     static boolean useUglyHackToForceCallToFlushInJava5;
61     static {
62         String version = System.getProperty("java.specification.version");
63         String vendor = System.getProperty("java.vm.vendor");
64         useUglyHackToForceCallToFlushInJava5 = "1.4".equals(version) || "1.5".equals(version);
65         useUglyHackToForceCallToFlushInJava5 &= "Sun Microsystems Inc.".equals(vendor);
66     }
67 
UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict)68     UTF7StyleCharsetEncoder(UTF7StyleCharset cs, Base64Util base64, boolean strict) {
69         super(cs, AVG_BYTES_PER_CHAR, MAX_BYTES_PER_CHAR);
70         this.cs = cs;
71         this.base64 = base64;
72         this.strict = strict;
73         this.shift = cs.shift();
74         this.unshift = cs.unshift();
75     }
76 
77     /*
78      * (non-Javadoc)
79      * @see java.nio.charset.CharsetEncoder#implReset()
80      */
implReset()81     protected void implReset() {
82         base64mode = false;
83         sextet = 0;
84         bitsToOutput = 0;
85     }
86 
87     /**
88      * {@inheritDoc}
89      * <p>
90      * Note that this method might return <code>CoderResult.OVERFLOW</code> (as
91      * is required by the specification) if insufficient space is available in
92      * the output buffer. However, calling it again on JDKs before Java 6
93      * triggers a bug in
94      * {@link java.nio.charset.CharsetEncoder#flush(ByteBuffer)} causing it to
95      * throw an IllegalStateException (the buggy method is <code>final</code>,
96      * thus cannot be overridden).
97      * </p>
98      *
99      * @see <a
100      *      href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6227608">
101      *      JDK bug 6227608< /a>
102      * @param out The output byte buffer
103      * @return A coder-result object describing the reason for termination
104      */
implFlush(ByteBuffer out)105     protected CoderResult implFlush(ByteBuffer out) {
106         if (base64mode) {
107             if (out.remaining() < 2)
108                 return CoderResult.OVERFLOW;
109             if (bitsToOutput != 0)
110                 out.put(base64.getChar(sextet));
111             out.put(unshift);
112         }
113         return CoderResult.UNDERFLOW;
114     }
115 
116     /**
117      * {@inheritDoc}
118      * <p>
119      * Note that this method might return <code>CoderResult.OVERFLOW</code>,
120      * even though there is sufficient space available in the output buffer.
121      * This is done to force the broken implementation of
122      * {@link java.nio.charset.CharsetEncoder#encode(CharBuffer)} to call flush
123      * (the buggy method is <code>final</code>, thus cannot be overridden).
124      * </p>
125      * <p>
126      * However, String.getBytes() fails if CoderResult.OVERFLOW is returned,
127      * since this assumes it always allocates sufficient bytes (maxBytesPerChar
128      * * nr_of_chars). Thus, as an extra check, the size of the input buffer is
129      * compared against the size of the output buffer. A static variable is used
130      * to indicate if a broken java version is used.
131      * </p>
132      * <p>
133      * It is not possible to directly write the last few bytes, since more bytes
134      * might be waiting to be encoded then those available in the input buffer.
135      * </p>
136      *
137      * @see <a
138      *      href="http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6221056">
139      *      JDK bug 6221056< /a>
140      * @param in The input character buffer
141      * @param out The output byte buffer
142      * @return A coder-result object describing the reason for termination
143      */
encodeLoop(CharBuffer in, ByteBuffer out)144     protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
145         while (in.hasRemaining()) {
146             if (out.remaining() < 4)
147                 return CoderResult.OVERFLOW;
148             char ch = in.get();
149             if (cs.canEncodeDirectly(ch)) {
150                 unshift(out, ch);
151                 out.put((byte)ch);
152             } else if (!base64mode && ch == shift) {
153                 out.put(shift);
154                 out.put(unshift);
155             } else
156                 encodeBase64(ch, out);
157         }
158         /*
159          * <HACK type="ugly"> These lines are required to trick JDK 1.5 and
160          * earlier into flushing when using Charset.encode(String),
161          * Charset.encode(CharBuffer) or CharsetEncoder.encode(CharBuffer)
162          * Without them, the last few bytes may be missing.
163          */
164         if (base64mode && useUglyHackToForceCallToFlushInJava5
165                 && out.limit() != MAX_BYTES_PER_CHAR * in.limit())
166             return CoderResult.OVERFLOW;
167         /* </HACK> */
168         return CoderResult.UNDERFLOW;
169     }
170 
171     /**
172      * <p>
173      * Writes the bytes necessary to leave <i>base 64 mode</i>. This might
174      * include an unshift character.
175      * </p>
176      *
177      * @param out
178      * @param ch
179      */
unshift(ByteBuffer out, char ch)180     private void unshift(ByteBuffer out, char ch) {
181         if (!base64mode)
182             return;
183         if (bitsToOutput != 0)
184             out.put(base64.getChar(sextet));
185         if (base64.contains(ch) || ch == unshift || strict)
186             out.put(unshift);
187         base64mode = false;
188         sextet = 0;
189         bitsToOutput = 0;
190     }
191 
192     /**
193      * <p>
194      * Writes the bytes necessary to encode a character in <i>base 64 mode</i>.
195      * All bytes which are fully determined will be written. The fields
196      * <code>bitsToOutput</code> and <code>sextet</code> are used to remember
197      * the bytes not yet fully determined.
198      * </p>
199      *
200      * @param out
201      * @param ch
202      */
encodeBase64(char ch, ByteBuffer out)203     private void encodeBase64(char ch, ByteBuffer out) {
204         if (!base64mode)
205             out.put(shift);
206         base64mode = true;
207         bitsToOutput += 16;
208         while (bitsToOutput >= 6) {
209             bitsToOutput -= 6;
210             sextet += (ch >> bitsToOutput);
211             sextet &= 0x3F;
212             out.put(base64.getChar(sextet));
213             sextet = 0;
214         }
215         sextet = (ch << (6 - bitsToOutput)) & 0x3F;
216     }
217 }
218