1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2000-2003, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File writejava.c
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 01/11/02 Ram Creation.
17 *******************************************************************************
18 */
19 #include "rle.h"
20 /**
21 * The ESCAPE character is used during run-length encoding. It signals
22 * a run of identical chars.
23 */
24 static const uint16_t ESCAPE = 0xA5A5;
25
26 /**
27 * The ESCAPE_BYTE character is used during run-length encoding. It signals
28 * a run of identical bytes.
29 */
30 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
31
32 /**
33 * Append a byte to the given StringBuffer, packing two bytes into each
34 * character. The state parameter maintains intermediary data between
35 * calls.
36 * @param state A two-element array, with state[0] == 0 if this is the
37 * first byte of a pair, or state[0] != 0 if this is the second byte
38 * of a pair, in which case state[1] is the first byte.
39 */
40 static uint16_t*
appendEncodedByte(uint16_t * buffer,uint16_t * buffLimit,uint8_t value,uint8_t state[],UErrorCode * status)41 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
42 if(!status || U_FAILURE(*status)){
43 return NULL;
44 }
45 if (state[0] != 0) {
46 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
47 if(buffer < buffLimit){
48 *buffer++ = c;
49 }else{
50 *status = U_BUFFER_OVERFLOW_ERROR;
51 }
52 state[0] = 0;
53 return buffer;
54 }
55 else {
56 state[0] = 1;
57 state[1] = value;
58 return buffer;
59 }
60 }
61 /**
62 * Encode a run, possibly a degenerate run (of < 4 values).
63 * @param length The length of the run; must be > 0 && <= 0xFF.
64 */
65 static uint16_t*
encodeRunByte(uint16_t * buffer,uint16_t * bufLimit,uint8_t value,int32_t length,uint8_t state[],UErrorCode * status)66 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
67 if(!status || U_FAILURE(*status)){
68 return NULL;
69 }
70 if (length < 4) {
71 int32_t j=0;
72 for (; j<length; ++j) {
73 if (value == ESCAPE_BYTE) {
74 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
75 }
76 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
77 }
78 }
79 else {
80 if (length == ESCAPE_BYTE) {
81 if (value == ESCAPE_BYTE){
82 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
83 }
84 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
85 --length;
86 }
87 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
88 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
89 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
90 }
91 return buffer;
92 }
93
94 #define APPEND( buffer, bufLimit, value, num, status){ \
95 if(buffer<bufLimit){ \
96 *buffer++=(value); \
97 }else{ \
98 *status = U_BUFFER_OVERFLOW_ERROR; \
99 } \
100 num++; \
101 }
102
103 /**
104 * Encode a run, possibly a degenerate run (of < 4 values).
105 * @param length The length of the run; must be > 0 && <= 0xFFFF.
106 */
107 static uint16_t*
encodeRunShort(uint16_t * buffer,uint16_t * bufLimit,uint16_t value,int32_t length,UErrorCode * status)108 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
109 int32_t num=0;
110 if (length < 4) {
111 int j=0;
112 for (; j<length; ++j) {
113 if (value == (int32_t) ESCAPE){
114 APPEND(buffer,bufLimit,ESCAPE, num, status);
115
116 }
117 APPEND(buffer,bufLimit,value,num, status);
118 }
119 }
120 else {
121 if (length == (int32_t) ESCAPE) {
122 if (value == (int32_t) ESCAPE){
123 APPEND(buffer,bufLimit,ESCAPE,num,status);
124
125 }
126 APPEND(buffer,bufLimit,value,num,status);
127 --length;
128 }
129 APPEND(buffer,bufLimit,ESCAPE,num,status);
130 APPEND(buffer,bufLimit,(uint16_t) length, num,status);
131 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
132 }
133 return buffer;
134 }
135
136 /**
137 * Construct a string representing a char array. Use run-length encoding.
138 * A character represents itself, unless it is the ESCAPE character. Then
139 * the following notations are possible:
140 * ESCAPE ESCAPE ESCAPE literal
141 * ESCAPE n c n instances of character c
142 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
143 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
144 * If we encounter a run where n == ESCAPE, we represent this as:
145 * c ESCAPE n-1 c
146 * The ESCAPE value is chosen so as not to collide with commonly
147 * seen values.
148 */
149 int32_t
usArrayToRLEString(const uint16_t * src,int32_t srcLen,uint16_t * buffer,int32_t bufLen,UErrorCode * status)150 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
151 uint16_t* bufLimit = buffer+bufLen;
152 uint16_t* saveBuffer = buffer;
153 if(buffer < bufLimit){
154 *buffer++ = (uint16_t)(srcLen>>16);
155 if(buffer<bufLimit){
156 uint16_t runValue = src[0];
157 int32_t runLength = 1;
158 int i=1;
159 *buffer++ = (uint16_t) srcLen;
160
161 for (; i<srcLen; ++i) {
162 uint16_t s = src[i];
163 if (s == runValue && runLength < 0xFFFF){
164 ++runLength;
165 }else {
166 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
167 runValue = s;
168 runLength = 1;
169 }
170 }
171 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
172 }else{
173 *status = U_BUFFER_OVERFLOW_ERROR;
174 }
175 }else{
176 *status = U_BUFFER_OVERFLOW_ERROR;
177 }
178 return (int32_t)(buffer - saveBuffer);
179 }
180
181 /**
182 * Construct a string representing a byte array. Use run-length encoding.
183 * Two bytes are packed into a single char, with a single extra zero byte at
184 * the end if needed. A byte represents itself, unless it is the
185 * ESCAPE_BYTE. Then the following notations are possible:
186 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
187 * ESCAPE_BYTE n b n instances of byte b
188 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
189 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
190 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
191 * b ESCAPE_BYTE n-1 b
192 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
193 * seen values.
194 */
195 int32_t
byteArrayToRLEString(const uint8_t * src,int32_t srcLen,uint16_t * buffer,int32_t bufLen,UErrorCode * status)196 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
197 const uint16_t* saveBuf = buffer;
198 uint16_t* bufLimit = buffer+bufLen;
199 if(buffer < bufLimit){
200 *buffer++ = ((uint16_t) (srcLen >> 16));
201
202 if(buffer<bufLimit){
203 uint8_t runValue = src[0];
204 int runLength = 1;
205 uint8_t state[2]= {0};
206 int i=1;
207 *buffer++=((uint16_t) srcLen);
208 for (; i<srcLen; ++i) {
209 uint8_t b = src[i];
210 if (b == runValue && runLength < 0xFF){
211 ++runLength;
212 }
213 else {
214 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
215 runValue = b;
216 runLength = 1;
217 }
218 }
219 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
220
221 /* We must save the final byte, if there is one, by padding
222 * an extra zero.
223 */
224 if (state[0] != 0) {
225 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
226 }
227 }else{
228 *status = U_BUFFER_OVERFLOW_ERROR;
229 }
230 }else{
231 *status = U_BUFFER_OVERFLOW_ERROR;
232 }
233 return (int32_t) (buffer - saveBuf);
234 }
235
236
237 /**
238 * Construct an array of shorts from a run-length encoded string.
239 */
240 int32_t
rleStringToUCharArray(uint16_t * src,int32_t srcLen,uint16_t * target,int32_t tgtLen,UErrorCode * status)241 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
242 int32_t length = 0;
243 int32_t ai = 0;
244 int i=2;
245
246 if(!status || U_FAILURE(*status)){
247 return 0;
248 }
249 /* the source is null terminated */
250 if(srcLen == -1){
251 srcLen = u_strlen(src);
252 }
253 if(srcLen <= 2){
254 return 2;
255 }
256 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
257
258 if(target == NULL){
259 return length;
260 }
261 if(tgtLen < length){
262 *status = U_BUFFER_OVERFLOW_ERROR;
263 return length;
264 }
265
266 for (; i<srcLen; ++i) {
267 uint16_t c = src[i];
268 if (c == ESCAPE) {
269 c = src[++i];
270 if (c == ESCAPE) {
271 target[ai++] = c;
272 } else {
273 int32_t runLength = (int32_t) c;
274 uint16_t runValue = src[++i];
275 int j=0;
276 for (; j<runLength; ++j) {
277 target[ai++] = runValue;
278 }
279 }
280 }
281 else {
282 target[ai++] = c;
283 }
284 }
285
286 if (ai != length){
287 *status = U_INTERNAL_PROGRAM_ERROR;
288 }
289
290 return length;
291 }
292
293 /**
294 * Construct an array of bytes from a run-length encoded string.
295 */
296 int32_t
rleStringToByteArray(uint16_t * src,int32_t srcLen,uint8_t * target,int32_t tgtLen,UErrorCode * status)297 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
298
299 int32_t length = 0;
300 UBool nextChar = TRUE;
301 uint16_t c = 0;
302 int32_t node = 0;
303 int32_t runLength = 0;
304 int32_t i = 2;
305 int32_t ai=0;
306
307 if(!status || U_FAILURE(*status)){
308 return 0;
309 }
310 /* the source is null terminated */
311 if(srcLen == -1){
312 srcLen = u_strlen(src);
313 }
314 if(srcLen <= 2){
315 return 2;
316 }
317 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
318
319 if(target == NULL){
320 return length;
321 }
322 if(tgtLen < length){
323 *status = U_BUFFER_OVERFLOW_ERROR;
324 return length;
325 }
326
327 for (; ai<tgtLen; ) {
328 /* This part of the loop places the next byte into the local
329 * variable 'b' each time through the loop. It keeps the
330 * current character in 'c' and uses the boolean 'nextChar'
331 * to see if we've taken both bytes out of 'c' yet.
332 */
333 uint8_t b;
334 if (nextChar) {
335 c = src[i++];
336 b = (uint8_t) (c >> 8);
337 nextChar = FALSE;
338 }
339 else {
340 b = (uint8_t) (c & 0xFF);
341 nextChar = TRUE;
342 }
343
344 /* This part of the loop is a tiny state machine which handles
345 * the parsing of the run-length encoding. This would be simpler
346 * if we could look ahead, but we can't, so we use 'node' to
347 * move between three nodes in the state machine.
348 */
349 switch (node) {
350 case 0:
351 /* Normal idle node */
352 if (b == ESCAPE_BYTE) {
353 node = 1;
354 }
355 else {
356 target[ai++] = b;
357 }
358 break;
359 case 1:
360 /* We have seen one ESCAPE_BYTE; we expect either a second
361 * one, or a run length and value.
362 */
363 if (b == ESCAPE_BYTE) {
364 target[ai++] = ESCAPE_BYTE;
365 node = 0;
366 }
367 else {
368 runLength = b;
369 node = 2;
370 }
371 break;
372 case 2:
373 {
374 int j=0;
375 /* We have seen an ESCAPE_BYTE and length byte. We interpret
376 * the next byte as the value to be repeated.
377 */
378 for (; j<runLength; ++j){
379 if(ai<tgtLen){
380 target[ai++] = b;
381 }else{
382 *status = U_BUFFER_OVERFLOW_ERROR;
383 return ai;
384 }
385 }
386 node = 0;
387 break;
388 }
389 }
390 }
391
392 if (node != 0){
393 *status = U_INTERNAL_PROGRAM_ERROR;
394 /*("Bad run-length encoded byte array")*/
395 return 0;
396 }
397
398
399 if (i != srcLen){
400 /*("Excess data in RLE byte array string");*/
401 *status = U_INTERNAL_PROGRAM_ERROR;
402 return ai;
403 }
404
405 return ai;
406 }
407
408