1 /* 2 ********************************************************************** 3 * Copyright (c) 2006-2007, Google and others. All Rights Reserved. 4 ********************************************************************** 5 * Author: Mark Davis 6 ********************************************************************** 7 */ 8 package org.unicode.cldr.util; 9 10 import java.io.IOException; 11 12 /** 13 * Class that converts strings to bytes and back. The bytes do not have to be UTF-8, since they are 14 * meant to only be used serially. In particular, the only restriction is that the transition 15 * between serialized characters must be discoverable by looking at either the last byte of the 16 * first character or the first byte of the second character. 17 * 18 * @author markdavis 19 */ 20 public abstract class StringByteConverter { 21 clear()22 public void clear() { 23 // default implementation does nothing 24 } 25 26 /** 27 * Return the maximum number of bytes per char. 28 * 29 * @return 30 */ getMaxBytesPerChar()31 public abstract int getMaxBytesPerChar(); 32 33 /** 34 * Converts char of source to output. Result may depend on previous context. Call clear() before 35 * first character, and call toBytes(output, bytePosition) after done. 36 * 37 * @param output buffer to fill 38 * @return new byte position 39 */ toBytes(char ch, byte[] output, int bytePosition)40 public abstract int toBytes(char ch, byte[] output, int bytePosition); 41 42 /** 43 * Converts final state, if any, to output. Result may depend on previous context. Call clear() 44 * before first character, and call toBytes(output, bytePosition) after done. 45 * 46 * @param output buffer to fill 47 * @return new byte position 48 */ toBytes(byte[] output, int bytePosition)49 public int toBytes(byte[] output, int bytePosition) { 50 return bytePosition; // default implementation does nothing 51 } 52 53 /** 54 * Read a string from a byte array. The byte array must be well formed; eg the contents from 55 * byteStart to byteLength will not cause errors, and will never overrun. It will always 56 * terminate at byteLength. The results are not guaranteed to be the same as would be gotten 57 * from inverting toBytes -- that will happen if multiple strings map to the same bytes. 58 * 59 * @param input byte array to read from 60 * @param byteStart TODO 61 * @param byteLength total length of the byte array 62 * @param result the result to add on to 63 * @return the result, for chaining. 64 * @throws IOException 65 */ fromBytes( byte[] input, int byteStart, int byteLength, Appendable result)66 public abstract Appendable fromBytes( 67 byte[] input, int byteStart, int byteLength, Appendable result); 68 69 /** 70 * Write a string to a byte array. 71 * 72 * @param source string to write 73 * @param output byte array to write into 74 * @param bytePosition place in byte array to start 75 * @return new position in byte array 76 */ toBytes(CharSequence source, byte[] output, int bytePosition)77 public int toBytes(CharSequence source, byte[] output, int bytePosition) { 78 for (int i = 0; i < source.length(); ++i) { 79 bytePosition = toBytes(source.charAt(i), output, bytePosition); 80 } 81 toBytes(output, bytePosition); // cleanup 82 return bytePosition; 83 } 84 toBytes(CharSequence source)85 public byte[] toBytes(CharSequence source) { 86 byte[] buffer = new byte[source.length() * getMaxBytesPerChar()]; 87 int len = toBytes(source, buffer, 0); 88 byte[] result = new byte[len]; 89 System.arraycopy(buffer, 0, result, 0, len); 90 return result; 91 } 92 } 93