• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.io;
18 
19 import java.io.Serializable;
20 import java.nio.charset.StandardCharsets;
21 import java.util.Locale;
22 import java.util.Objects;
23 
24 /**
25  * Byte Order Mark (BOM) representation. See {@link org.apache.commons.io.input.BOMInputStream}.
26  * <p>
27  * We define the follow BOM constants:
28  * </p>
29  * <ul>
30  * <li>{@link #UTF_16BE}</li>
31  * <li>{@link #UTF_16LE}</li>
32  * <li>{@link #UTF_32BE}</li>
33  * <li>{@link #UTF_32LE}</li>
34  * <li>{@link #UTF_8}</li>
35  * </ul>
36  * <h2>Deprecating Serialization</h2>
37  * <p>
38  * <em>Serialization is deprecated and will be removed in 3.0.</em>
39  * </p>
40  *
41  * @see org.apache.commons.io.input.BOMInputStream
42  * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
43  * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
44  *      (Non-Normative)</a>
45  * @since 2.0
46  */
47 public class ByteOrderMark implements Serializable {
48 
49     private static final long serialVersionUID = 1L;
50 
51     /**
52      * UTF-8 BOM.
53      * <p>
54      * This BOM is:
55      * </p>
56      * <pre>
57      * 0xEF 0xBB 0xBF
58      * </pre>
59      */
60     public static final ByteOrderMark UTF_8 = new ByteOrderMark(StandardCharsets.UTF_8.name(), 0xEF, 0xBB, 0xBF);
61 
62     /**
63      * UTF-16BE BOM (Big-Endian).
64      * <p>
65      * This BOM is:
66      * </p>
67      * <pre>
68      * 0xFE 0xFF
69      * </pre>
70      */
71     public static final ByteOrderMark UTF_16BE = new ByteOrderMark(StandardCharsets.UTF_16BE.name(), 0xFE, 0xFF);
72 
73     /**
74      * UTF-16LE BOM (Little-Endian).
75      * <p>
76      * This BOM is:
77      * </p>
78      * <pre>
79      * 0xFF 0xFE
80      * </pre>
81      */
82     public static final ByteOrderMark UTF_16LE = new ByteOrderMark(StandardCharsets.UTF_16LE.name(), 0xFF, 0xFE);
83 
84     /**
85      * UTF-32BE BOM (Big-Endian).
86      * <p>
87      * This BOM is:
88      * </p>
89      * <pre>
90      * 0x00 0x00 0xFE 0xFF
91      * </pre>
92      *
93      * @since 2.2
94      */
95     public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
96 
97     /**
98      * UTF-32LE BOM (Little-Endian).
99      * <p>
100      * This BOM is:
101      * </p>
102      * <pre>
103      * 0xFF 0xFE 0x00 0x00
104      * </pre>
105      *
106      * @since 2.2
107      */
108     public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
109 
110     /**
111      * Unicode BOM character; external form depends on the encoding.
112      *
113      * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
114      * @since 2.5
115      */
116     public static final char UTF_BOM = '\uFEFF';
117 
118     /**
119      * Charset name.
120      */
121     private final String charsetName;
122 
123     /**
124      * Bytes.
125      */
126     private final int[] bytes;
127 
128     /**
129      * Constructs a new instance.
130      *
131      * @param charsetName The name of the charset the BOM represents
132      * @param bytes The BOM's bytes
133      * @throws IllegalArgumentException if the charsetName is zero length
134      * @throws IllegalArgumentException if the bytes are zero length
135      */
ByteOrderMark(final String charsetName, final int... bytes)136     public ByteOrderMark(final String charsetName, final int... bytes) {
137         Objects.requireNonNull(charsetName, "charsetName");
138         Objects.requireNonNull(bytes, "bytes");
139         if (charsetName.isEmpty()) {
140             throw new IllegalArgumentException("No charsetName specified");
141         }
142         if (bytes.length == 0) {
143             throw new IllegalArgumentException("No bytes specified");
144         }
145         this.charsetName = charsetName;
146         this.bytes = bytes.clone();
147     }
148 
149     /**
150      * Indicates if this instance's bytes equals another.
151      *
152      * @param obj The object to compare to
153      * @return true if the bom's bytes are equal, otherwise
154      * false
155      */
156     @Override
equals(final Object obj)157     public boolean equals(final Object obj) {
158         if (!(obj instanceof ByteOrderMark)) {
159             return false;
160         }
161         final ByteOrderMark bom = (ByteOrderMark) obj;
162         if (bytes.length != bom.length()) {
163             return false;
164         }
165         for (int i = 0; i < bytes.length; i++) {
166             if (bytes[i] != bom.get(i)) {
167                 return false;
168             }
169         }
170         return true;
171     }
172 
173     /**
174      * Gets the byte at the specified position.
175      *
176      * @param pos The position
177      * @return The specified byte
178      */
get(final int pos)179     public int get(final int pos) {
180         return bytes[pos];
181     }
182 
183     /**
184      * Gets a copy of the BOM's bytes.
185      *
186      * @return a copy of the BOM's bytes
187      */
getBytes()188     public byte[] getBytes() {
189         final byte[] copy = IOUtils.byteArray(bytes.length);
190         for (int i = 0; i < bytes.length; i++) {
191             copy[i] = (byte) bytes[i];
192         }
193         return copy;
194     }
195 
196     /**
197      * Gets the name of the {@link java.nio.charset.Charset} the BOM represents.
198      *
199      * @return the character set name
200      */
getCharsetName()201     public String getCharsetName() {
202         return charsetName;
203     }
204 
205     /**
206      * Computes the hash code for this BOM.
207      *
208      * @return the hash code for this BOM.
209      * @see Object#hashCode()
210      */
211     @Override
hashCode()212     public int hashCode() {
213         int hashCode = getClass().hashCode();
214         for (final int b : bytes) {
215             hashCode += b;
216         }
217         return hashCode;
218     }
219 
220     /**
221      * Gets the length of the BOM's bytes.
222      *
223      * @return the length of the BOM's bytes
224      */
length()225     public int length() {
226         return bytes.length;
227     }
228 
229     /**
230      * Converts this instance to a String representation of the BOM.
231      *
232      * @return the length of the BOM's bytes
233      */
234     @Override
toString()235     public String toString() {
236         final StringBuilder builder = new StringBuilder();
237         builder.append(getClass().getSimpleName());
238         builder.append('[');
239         builder.append(charsetName);
240         builder.append(": ");
241         for (int i = 0; i < bytes.length; i++) {
242             if (i > 0) {
243                 builder.append(",");
244             }
245             builder.append("0x");
246             builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
247         }
248         builder.append(']');
249         return builder.toString();
250     }
251 
252 }
253