1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2000-2003, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File writejava.c
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 01/11/02 Ram Creation.
17 *******************************************************************************
18 */
19 #include <stdbool.h>
20 #include "rle.h"
21 /**
22 * The ESCAPE character is used during run-length encoding. It signals
23 * a run of identical chars.
24 */
25 static const uint16_t ESCAPE = 0xA5A5;
26
27 /**
28 * The ESCAPE_BYTE character is used during run-length encoding. It signals
29 * a run of identical bytes.
30 */
31 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
32
33 /**
34 * Append a byte to the given StringBuffer, packing two bytes into each
35 * character. The state parameter maintains intermediary data between
36 * calls.
37 * @param state A two-element array, with state[0] == 0 if this is the
38 * first byte of a pair, or state[0] != 0 if this is the second byte
39 * of a pair, in which case state[1] is the first byte.
40 */
41 static uint16_t*
appendEncodedByte(uint16_t * buffer,uint16_t * buffLimit,uint8_t value,uint8_t state[],UErrorCode * status)42 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
43 if(!status || U_FAILURE(*status)){
44 return NULL;
45 }
46 if (state[0] != 0) {
47 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
48 if(buffer < buffLimit){
49 *buffer++ = c;
50 }else{
51 *status = U_BUFFER_OVERFLOW_ERROR;
52 }
53 state[0] = 0;
54 return buffer;
55 }
56 else {
57 state[0] = 1;
58 state[1] = value;
59 return buffer;
60 }
61 }
62 /**
63 * Encode a run, possibly a degenerate run (of < 4 values).
64 * @param length The length of the run; must be > 0 && <= 0xFF.
65 */
66 static uint16_t*
encodeRunByte(uint16_t * buffer,uint16_t * bufLimit,uint8_t value,int32_t length,uint8_t state[],UErrorCode * status)67 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
68 if(!status || U_FAILURE(*status)){
69 return NULL;
70 }
71 if (length < 4) {
72 int32_t j=0;
73 for (; j<length; ++j) {
74 if (value == ESCAPE_BYTE) {
75 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
76 }
77 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
78 }
79 }
80 else {
81 if (length == ESCAPE_BYTE) {
82 if (value == ESCAPE_BYTE){
83 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
84 }
85 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
86 --length;
87 }
88 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
89 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
90 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
91 }
92 return buffer;
93 }
94
95 #define APPEND( buffer, bufLimit, value, num, status) UPRV_BLOCK_MACRO_BEGIN { \
96 if(buffer<bufLimit){ \
97 *buffer++=(value); \
98 }else{ \
99 *status = U_BUFFER_OVERFLOW_ERROR; \
100 } \
101 num++; \
102 } UPRV_BLOCK_MACRO_END
103
104 /**
105 * Encode a run, possibly a degenerate run (of < 4 values).
106 * @param length The length of the run; must be > 0 && <= 0xFFFF.
107 */
108 static uint16_t*
encodeRunShort(uint16_t * buffer,uint16_t * bufLimit,uint16_t value,int32_t length,UErrorCode * status)109 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
110 int32_t num=0;
111 if (length < 4) {
112 int j=0;
113 for (; j<length; ++j) {
114 if (value == (int32_t) ESCAPE){
115 APPEND(buffer,bufLimit,ESCAPE, num, status);
116
117 }
118 APPEND(buffer,bufLimit,value,num, status);
119 }
120 }
121 else {
122 if (length == (int32_t) ESCAPE) {
123 if (value == (int32_t) ESCAPE){
124 APPEND(buffer,bufLimit,ESCAPE,num,status);
125
126 }
127 APPEND(buffer,bufLimit,value,num,status);
128 --length;
129 }
130 APPEND(buffer,bufLimit,ESCAPE,num,status);
131 APPEND(buffer,bufLimit,(uint16_t) length, num,status);
132 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
133 }
134 return buffer;
135 }
136
137 /**
138 * Construct a string representing a char array. Use run-length encoding.
139 * A character represents itself, unless it is the ESCAPE character. Then
140 * the following notations are possible:
141 * ESCAPE ESCAPE ESCAPE literal
142 * ESCAPE n c n instances of character c
143 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
144 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
145 * If we encounter a run where n == ESCAPE, we represent this as:
146 * c ESCAPE n-1 c
147 * The ESCAPE value is chosen so as not to collide with commonly
148 * seen values.
149 */
150 int32_t
usArrayToRLEString(const uint16_t * src,int32_t srcLen,uint16_t * buffer,int32_t bufLen,UErrorCode * status)151 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
152 uint16_t* bufLimit = buffer+bufLen;
153 uint16_t* saveBuffer = buffer;
154 if(buffer < bufLimit){
155 *buffer++ = (uint16_t)(srcLen>>16);
156 if(buffer<bufLimit){
157 uint16_t runValue = src[0];
158 int32_t runLength = 1;
159 int i=1;
160 *buffer++ = (uint16_t) srcLen;
161
162 for (; i<srcLen; ++i) {
163 uint16_t s = src[i];
164 if (s == runValue && runLength < 0xFFFF){
165 ++runLength;
166 }else {
167 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
168 runValue = s;
169 runLength = 1;
170 }
171 }
172 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
173 }else{
174 *status = U_BUFFER_OVERFLOW_ERROR;
175 }
176 }else{
177 *status = U_BUFFER_OVERFLOW_ERROR;
178 }
179 return (int32_t)(buffer - saveBuffer);
180 }
181
182 /**
183 * Construct a string representing a byte array. Use run-length encoding.
184 * Two bytes are packed into a single char, with a single extra zero byte at
185 * the end if needed. A byte represents itself, unless it is the
186 * ESCAPE_BYTE. Then the following notations are possible:
187 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
188 * ESCAPE_BYTE n b n instances of byte b
189 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
190 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
191 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
192 * b ESCAPE_BYTE n-1 b
193 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
194 * seen values.
195 */
196 int32_t
byteArrayToRLEString(const uint8_t * src,int32_t srcLen,uint16_t * buffer,int32_t bufLen,UErrorCode * status)197 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
198 const uint16_t* saveBuf = buffer;
199 uint16_t* bufLimit = buffer+bufLen;
200 if(buffer < bufLimit){
201 *buffer++ = ((uint16_t) (srcLen >> 16));
202
203 if(buffer<bufLimit){
204 uint8_t runValue = src[0];
205 int runLength = 1;
206 uint8_t state[2]= {0};
207 int i=1;
208 *buffer++=((uint16_t) srcLen);
209 for (; i<srcLen; ++i) {
210 uint8_t b = src[i];
211 if (b == runValue && runLength < 0xFF){
212 ++runLength;
213 }
214 else {
215 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
216 runValue = b;
217 runLength = 1;
218 }
219 }
220 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
221
222 /* We must save the final byte, if there is one, by padding
223 * an extra zero.
224 */
225 if (state[0] != 0) {
226 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
227 }
228 }else{
229 *status = U_BUFFER_OVERFLOW_ERROR;
230 }
231 }else{
232 *status = U_BUFFER_OVERFLOW_ERROR;
233 }
234 return (int32_t) (buffer - saveBuf);
235 }
236
237
238 /**
239 * Construct an array of shorts from a run-length encoded string.
240 */
241 int32_t
rleStringToUCharArray(uint16_t * src,int32_t srcLen,uint16_t * target,int32_t tgtLen,UErrorCode * status)242 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
243 int32_t length = 0;
244 int32_t ai = 0;
245 int i=2;
246
247 if(!status || U_FAILURE(*status)){
248 return 0;
249 }
250 /* the source is null terminated */
251 if(srcLen == -1){
252 srcLen = u_strlen(src);
253 }
254 if(srcLen <= 2){
255 return 2;
256 }
257 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
258
259 if(target == NULL){
260 return length;
261 }
262 if(tgtLen < length){
263 *status = U_BUFFER_OVERFLOW_ERROR;
264 return length;
265 }
266
267 for (; i<srcLen; ++i) {
268 uint16_t c = src[i];
269 if (c == ESCAPE) {
270 c = src[++i];
271 if (c == ESCAPE) {
272 target[ai++] = c;
273 } else {
274 int32_t runLength = (int32_t) c;
275 uint16_t runValue = src[++i];
276 int j=0;
277 for (; j<runLength; ++j) {
278 target[ai++] = runValue;
279 }
280 }
281 }
282 else {
283 target[ai++] = c;
284 }
285 }
286
287 if (ai != length){
288 *status = U_INTERNAL_PROGRAM_ERROR;
289 }
290
291 return length;
292 }
293
294 /**
295 * Construct an array of bytes from a run-length encoded string.
296 */
297 int32_t
rleStringToByteArray(uint16_t * src,int32_t srcLen,uint8_t * target,int32_t tgtLen,UErrorCode * status)298 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
299
300 int32_t length = 0;
301 UBool nextChar = true;
302 uint16_t c = 0;
303 int32_t node = 0;
304 int32_t runLength = 0;
305 int32_t i = 2;
306 int32_t ai=0;
307
308 if(!status || U_FAILURE(*status)){
309 return 0;
310 }
311 /* the source is null terminated */
312 if(srcLen == -1){
313 srcLen = u_strlen(src);
314 }
315 if(srcLen <= 2){
316 return 2;
317 }
318 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
319
320 if(target == NULL){
321 return length;
322 }
323 if(tgtLen < length){
324 *status = U_BUFFER_OVERFLOW_ERROR;
325 return length;
326 }
327
328 for (; ai<tgtLen; ) {
329 /* This part of the loop places the next byte into the local
330 * variable 'b' each time through the loop. It keeps the
331 * current character in 'c' and uses the boolean 'nextChar'
332 * to see if we've taken both bytes out of 'c' yet.
333 */
334 uint8_t b;
335 if (nextChar) {
336 c = src[i++];
337 b = (uint8_t) (c >> 8);
338 nextChar = false;
339 }
340 else {
341 b = (uint8_t) (c & 0xFF);
342 nextChar = true;
343 }
344
345 /* This part of the loop is a tiny state machine which handles
346 * the parsing of the run-length encoding. This would be simpler
347 * if we could look ahead, but we can't, so we use 'node' to
348 * move between three nodes in the state machine.
349 */
350 switch (node) {
351 case 0:
352 /* Normal idle node */
353 if (b == ESCAPE_BYTE) {
354 node = 1;
355 }
356 else {
357 target[ai++] = b;
358 }
359 break;
360 case 1:
361 /* We have seen one ESCAPE_BYTE; we expect either a second
362 * one, or a run length and value.
363 */
364 if (b == ESCAPE_BYTE) {
365 target[ai++] = ESCAPE_BYTE;
366 node = 0;
367 }
368 else {
369 runLength = b;
370 node = 2;
371 }
372 break;
373 case 2:
374 {
375 int j=0;
376 /* We have seen an ESCAPE_BYTE and length byte. We interpret
377 * the next byte as the value to be repeated.
378 */
379 for (; j<runLength; ++j){
380 if(ai<tgtLen){
381 target[ai++] = b;
382 }else{
383 *status = U_BUFFER_OVERFLOW_ERROR;
384 return ai;
385 }
386 }
387 node = 0;
388 break;
389 }
390 }
391 }
392
393 if (node != 0){
394 *status = U_INTERNAL_PROGRAM_ERROR;
395 /*("Bad run-length encoded byte array")*/
396 return 0;
397 }
398
399
400 if (i != srcLen){
401 /*("Excess data in RLE byte array string");*/
402 *status = U_INTERNAL_PROGRAM_ERROR;
403 return ai;
404 }
405
406 return ai;
407 }
408
409