1 /* Copyright (C) 2007-2008 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 ** GNU General Public License for more details.
11 */
12 #include "gsm.h"
13 #include <stdlib.h>
14 #include <string.h>
15
16 /** UTILITIES
17 **/
18 byte_t
gsm_int_to_bcdi(int value)19 gsm_int_to_bcdi( int value )
20 {
21 return (byte_t)((value / 10) | ((value % 10) << 4));
22 }
23
24 int
gsm_int_from_bcdi(byte_t val)25 gsm_int_from_bcdi( byte_t val )
26 {
27 int ret = 0;
28
29 if ((val & 0xf0) <= 0x90)
30 ret = (val >> 4);
31
32 if ((val & 0x0f) <= 0x90)
33 ret |= (val % 0xf)*10;
34
35 return ret;
36 }
37
38 #if 0
39 static int
40 gsm_bcdi_to_ascii( cbytes_t bcd, int bcdlen, bytes_t dst )
41 {
42 static byte_t bcdichars[14] = "0123456789*#,N";
43
44 int result = 0;
45 int shift = 0;
46
47 while (bcdlen > 0) {
48 int c = (bcd[0] >> shift) & 0xf;
49
50 if (c == 0xf && bcdlen == 1)
51 break;
52
53 if (c < 14) {
54 if (dst) dst[result] = bcdichars[c];
55 result += 1;
56 }
57 bcdlen --;
58 shift += 4;
59 if (shift == 8) {
60 bcd++;
61 shift = 0;
62 }
63 }
64 return result;
65 }
66 #endif
67
68 #if 0
69 static int
70 gsm_bcdi_from_ascii( cbytes_t ascii, int asciilen, bytes_t dst )
71 {
72 cbytes_t end = ascii + asciilen;
73 int result = 0;
74 int phase = 0x01;
75
76 while (ascii < end) {
77 int c = *ascii++;
78
79 if (c == '*')
80 c = 11;
81 else if (c == '#')
82 c = 12;
83 else if (c == ',')
84 c = 13;
85 else if (c == 'N')
86 c = 14;
87 else {
88 c -= '0';
89 if ((unsigned)c >= 10)
90 break;
91 }
92 phase = (phase << 4) | c;
93 if (phase & 0x100) {
94 if (dst) dst[result] = (byte_t) phase;
95 result += 1;
96 phase = 0x01;
97 }
98 }
99 if (phase != 0x01) {
100 if (dst) dst[result] = (byte_t)( phase | 0xf0 );
101 result += 1;
102 }
103 return result;
104 }
105 #endif
106
107 int
gsm_hexchar_to_int(char c)108 gsm_hexchar_to_int( char c )
109 {
110 if ((unsigned)(c - '0') < 10)
111 return c - '0';
112 if ((unsigned)(c - 'a') < 6)
113 return 10 + (c - 'a');
114 if ((unsigned)(c - 'A') < 6)
115 return 10 + (c - 'A');
116 return -1;
117 }
118
119 int
gsm_hexchar_to_int0(char c)120 gsm_hexchar_to_int0( char c )
121 {
122 int ret = gsm_hexchar_to_int(c);
123
124 return (ret < 0) ? 0 : ret;
125 }
126
127 int
gsm_hex2_to_byte(const char * hex)128 gsm_hex2_to_byte( const char* hex )
129 {
130 int hi = gsm_hexchar_to_int(hex[0]);
131 int lo = gsm_hexchar_to_int(hex[1]);
132
133 if (hi < 0 || lo < 0)
134 return -1;
135
136 return ( (hi << 4) | lo );
137 }
138
139 int
gsm_hex4_to_short(const char * hex)140 gsm_hex4_to_short( const char* hex )
141 {
142 int hi = gsm_hex2_to_byte(hex);
143 int lo = gsm_hex2_to_byte(hex+2);
144
145 if (hi < 0 || lo < 0)
146 return -1;
147
148 return ((hi << 8) | lo);
149 }
150
151 int
gsm_hex2_to_byte0(const char * hex)152 gsm_hex2_to_byte0( const char* hex )
153 {
154 int hi = gsm_hexchar_to_int0(hex[0]);
155 int lo = gsm_hexchar_to_int0(hex[1]);
156
157 return (byte_t)( (hi << 4) | lo );
158 }
159
160 void
gsm_hex_from_byte(char * hex,int val)161 gsm_hex_from_byte( char* hex, int val )
162 {
163 static const char hexdigits[] = "0123456789abcdef";
164
165 hex[0] = hexdigits[(val >> 4) & 15];
166 hex[1] = hexdigits[val & 15];
167 }
168
169 void
gsm_hex_from_short(char * hex,int val)170 gsm_hex_from_short( char* hex, int val )
171 {
172 gsm_hex_from_byte( hex, (val >> 8) );
173 gsm_hex_from_byte( hex+2, val );
174 }
175
176
177
178 /** HEX
179 **/
180 void
gsm_hex_to_bytes0(cbytes_t hex,int hexlen,bytes_t dst)181 gsm_hex_to_bytes0( cbytes_t hex, int hexlen, bytes_t dst )
182 {
183 int nn;
184
185 for (nn = 0; nn < hexlen/2; nn++ ) {
186 dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn );
187 }
188 if (hexlen & 1) {
189 dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4;
190 }
191 }
192
193 int
gsm_hex_to_bytes(cbytes_t hex,int hexlen,bytes_t dst)194 gsm_hex_to_bytes( cbytes_t hex, int hexlen, bytes_t dst )
195 {
196 int nn;
197
198 if (hexlen & 1) /* must be even */
199 return -1;
200
201 for (nn = 0; nn < hexlen/2; nn++ ) {
202 int c = gsm_hex2_to_byte( (const char*)hex+2*nn );
203 if (c < 0) return -1;
204 dst[nn] = (byte_t) c;
205 }
206 return hexlen/2;
207 }
208
209 void
gsm_hex_from_bytes(char * hex,cbytes_t src,int srclen)210 gsm_hex_from_bytes( char* hex, cbytes_t src, int srclen )
211 {
212 int nn;
213
214 for (nn = 0; nn < srclen; nn++) {
215 gsm_hex_from_byte( hex + 2*nn, src[nn] );
216 }
217 }
218
219 /** ROPES
220 **/
221
222 void
gsm_rope_init(GsmRope rope)223 gsm_rope_init( GsmRope rope )
224 {
225 rope->data = NULL;
226 rope->pos = 0;
227 rope->max = 0;
228 rope->error = 0;
229 }
230
231 void
gsm_rope_init_alloc(GsmRope rope,int count)232 gsm_rope_init_alloc( GsmRope rope, int count )
233 {
234 rope->data = rope->data0;
235 rope->pos = 0;
236 rope->max = sizeof(rope->data0);
237 rope->error = 0;
238
239 if (count > 0) {
240 rope->data = calloc( count, 1 );
241 rope->max = count;
242
243 if (rope->data == NULL) {
244 rope->error = 1;
245 rope->max = 0;
246 }
247 }
248 }
249
250 int
gsm_rope_done(GsmRope rope)251 gsm_rope_done( GsmRope rope )
252 {
253 int result = rope->error;
254
255 if (rope->data && rope->data != rope->data0)
256 free(rope->data);
257
258 rope->data = NULL;
259 rope->pos = 0;
260 rope->max = 0;
261 rope->error = 0;
262
263 return result;
264 }
265
266
267 bytes_t
gsm_rope_done_acquire(GsmRope rope,int * psize)268 gsm_rope_done_acquire( GsmRope rope, int *psize )
269 {
270 bytes_t result = rope->data;
271
272 *psize = rope->pos;
273 if (result == rope->data0) {
274 result = malloc( rope->pos );
275 if (result != NULL)
276 memcpy( result, rope->data, rope->pos );
277 }
278 return result;
279 }
280
281
282 int
gsm_rope_ensure(GsmRope rope,int new_count)283 gsm_rope_ensure( GsmRope rope, int new_count )
284 {
285 if (rope->data != NULL) {
286 int old_max = rope->max;
287 bytes_t old_data = rope->data == rope->data0 ? NULL : rope->data;
288 int new_max = old_max;
289 bytes_t new_data;
290
291 while (new_max < new_count) {
292 new_max += (new_max >> 1) + 4;
293 }
294 new_data = realloc( old_data, new_max );
295 if (new_data == NULL) {
296 rope->error = 1;
297 return -1;
298 }
299 rope->data = new_data;
300 rope->max = new_max;
301 } else {
302 rope->max = new_count;
303 }
304 return 0;
305 }
306
307 static int
gsm_rope_can_grow(GsmRope rope,int count)308 gsm_rope_can_grow( GsmRope rope, int count )
309 {
310 if (!rope->data || rope->error)
311 return 0;
312
313 if (rope->pos + count > rope->max)
314 {
315 if (rope->data == NULL)
316 rope->max = rope->pos + count;
317
318 else if (rope->error ||
319 gsm_rope_ensure( rope, rope->pos + count ) < 0)
320 return 0;
321 }
322 return 1;
323 }
324
325 void
gsm_rope_add_c(GsmRope rope,char c)326 gsm_rope_add_c( GsmRope rope, char c )
327 {
328 if (gsm_rope_can_grow(rope, 1)) {
329 rope->data[ rope->pos ] = (byte_t) c;
330 }
331 rope->pos += 1;
332 }
333
334 void
gsm_rope_add(GsmRope rope,const void * buf,int buflen)335 gsm_rope_add( GsmRope rope, const void* buf, int buflen )
336 {
337 if (gsm_rope_can_grow(rope, buflen)) {
338 memcpy( rope->data + rope->pos, (const char*)buf, buflen );
339 }
340 rope->pos += buflen;
341 }
342
343 void*
gsm_rope_reserve(GsmRope rope,int count)344 gsm_rope_reserve( GsmRope rope, int count )
345 {
346 void* result = NULL;
347
348 if (gsm_rope_can_grow(rope, count))
349 {
350 if (rope->data != NULL)
351 result = rope->data + rope->pos;
352 }
353 rope->pos += count;
354
355 return result;
356 }
357
358 /* skip a given number of Unicode characters in a utf-8 byte string */
359 cbytes_t
utf8_skip(cbytes_t utf8,cbytes_t utf8end,int count)360 utf8_skip( cbytes_t utf8,
361 cbytes_t utf8end,
362 int count)
363 {
364 cbytes_t p = utf8;
365 cbytes_t end = utf8end;
366
367 for ( ; count > 0; count-- ) {
368 int c;
369
370 if (p >= end)
371 break;
372
373 c = *p++;
374 if (c > 128) {
375 while (p < end && (p[0] & 0xc0) == 0x80)
376 p++;
377 }
378 }
379 return p;
380 }
381
382
383 static __inline__ int
utf8_next(cbytes_t * pp,cbytes_t end)384 utf8_next( cbytes_t *pp, cbytes_t end )
385 {
386 cbytes_t p = *pp;
387 int result = -1;
388
389 if (p < end) {
390 int c= *p++;
391 if (c >= 128) {
392 if ((c & 0xe0) == 0xc0)
393 c &= 0x1f;
394 else if ((c & 0xf0) == 0xe0)
395 c &= 0x0f;
396 else
397 c &= 0x07;
398
399 while (p < end && (p[0] & 0xc0) == 0x80) {
400 c = (c << 6) | (p[0] & 0x3f);
401 p ++;
402 }
403 }
404 result = c;
405 *pp = p;
406 }
407 return result;
408 }
409
410
411 __inline__ int
utf8_write(bytes_t utf8,int offset,int v)412 utf8_write( bytes_t utf8, int offset, int v )
413 {
414 int result;
415
416 if (v < 128) {
417 result = 1;
418 if (utf8)
419 utf8[offset] = (byte_t) v;
420 } else if (v < 0x800) {
421 result = 2;
422 if (utf8) {
423 utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) );
424 utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) );
425 }
426 } else if (v < 0x10000) {
427 result = 3;
428 if (utf8) {
429 utf8[offset+0] = (byte_t)( 0xe0 | (v >> 12) );
430 utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
431 utf8[offset+2] = (byte_t)( 0x80 | (v & 0x3f) );
432 }
433 } else {
434 result = 4;
435 if (utf8) {
436 utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) );
437 utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) );
438 utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
439 utf8[offset+3] = (byte_t)( 0x80 | (v & 0x3f) );
440 }
441 }
442 return result;
443 }
444
445 static __inline__ int
ucs2_write(bytes_t ucs2,int offset,int v)446 ucs2_write( bytes_t ucs2, int offset, int v )
447 {
448 if (ucs2) {
449 ucs2[offset+0] = (byte_t) (v >> 8);
450 ucs2[offset+1] = (byte_t) (v);
451 }
452 return 2;
453 }
454
455 int
utf8_check(cbytes_t p,int utf8len)456 utf8_check( cbytes_t p, int utf8len )
457 {
458 cbytes_t end = p + utf8len;
459 int result = 0;
460
461 if (p) {
462 while (p < end) {
463 int c = *p++;
464 if (c >= 128) {
465 int len;
466 if ((c & 0xe0) == 0xc0) {
467 len = 1;
468 }
469 else if ((c & 0xf0) == 0xe0) {
470 len = 2;
471 }
472 else if ((c & 0xf8) == 0xf0) {
473 len = 3;
474 }
475 else
476 goto Exit; /* malformed utf-8 */
477
478 if (p+len > end) /* string too short */
479 goto Exit;
480
481 for ( ; len > 0; len--, p++ ) {
482 if ((p[0] & 0xc0) != 0x80)
483 goto Exit;
484 }
485 }
486 }
487 result = 1;
488 }
489 Exit:
490 return result;
491 }
492
493 /** UCS2 to UTF8
494 **/
495
496 /* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */
497 int
ucs2_to_utf8(cbytes_t ucs2,int ucs2len,bytes_t buf)498 ucs2_to_utf8( cbytes_t ucs2,
499 int ucs2len,
500 bytes_t buf )
501 {
502 int nn;
503 int result = 0;
504
505 for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) {
506 int c= (ucs2[0] << 8) | ucs2[1];
507 result += utf8_write(buf, result, c);
508 }
509 return result;
510 }
511
512 /* count the number of UCS2 chars contained in a utf8 byte string */
513 int
utf8_to_ucs2(cbytes_t utf8,int utf8len,bytes_t ucs2)514 utf8_to_ucs2( cbytes_t utf8,
515 int utf8len,
516 bytes_t ucs2 )
517 {
518 cbytes_t p = utf8;
519 cbytes_t end = p + utf8len;
520 int result = 0;
521
522 while (p < end) {
523 int c = utf8_next(&p, end);
524
525 if (c < 0)
526 break;
527
528 result += ucs2_write(ucs2, result, c);
529 }
530 return result/2;
531 }
532
533
534
535 /** GSM ALPHABET
536 **/
537
538 #define GSM_7BITS_ESCAPE 0x1b
539 #define GSM_7BITS_UNKNOWN 0
540
541 static const unsigned short gsm7bits_to_unicode[128] = {
542 '@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5,
543 0x394, '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e, 0, 0xc6, 0xe6, 0xdf, 0xc9,
544 ' ', '!', '"', '#', 0xa4, '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
545 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
546 0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
547 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7,
548 0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
549 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0,
550 };
551
552 static const unsigned short gsm7bits_extend_to_unicode[128] = {
553 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\f', 0, 0, 0, 0, 0,
554 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
555 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0,'\\',
556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0,
557 '|', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0,0x20ac, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
561 };
562
563
564 static int
unichar_to_gsm7(int unicode)565 unichar_to_gsm7( int unicode )
566 {
567 int nn;
568 for (nn = 0; nn < 128; nn++) {
569 if (gsm7bits_to_unicode[nn] == unicode) {
570 return nn;
571 }
572 }
573 return -1;
574 }
575
576 static int
unichar_to_gsm7_extend(int unichar)577 unichar_to_gsm7_extend( int unichar )
578 {
579 int nn;
580 for (nn = 0; nn < 128; nn++) {
581 if (gsm7bits_extend_to_unicode[nn] == unichar) {
582 return nn;
583 }
584 }
585 return -1;
586 }
587
588
589 /* return the number of septets needed to encode a unicode charcode */
590 static int
unichar_to_gsm7_count(int unicode)591 unichar_to_gsm7_count( int unicode )
592 {
593 int nn;
594
595 nn = unichar_to_gsm7(unicode);
596 if (nn >= 0)
597 return 1;
598
599 nn = unichar_to_gsm7_extend(unicode);
600 if (nn >= 0)
601 return 2;
602
603 return 0;
604 }
605
606
607 cbytes_t
utf8_skip_gsm7(cbytes_t utf8,cbytes_t utf8end,int gsm7len)608 utf8_skip_gsm7( cbytes_t utf8, cbytes_t utf8end, int gsm7len )
609 {
610 cbytes_t p = utf8;
611 cbytes_t end = utf8end;
612
613 while (gsm7len >0) {
614 cbytes_t q = p;
615 int c = utf8_next( &q, end );
616 int len;
617
618 if (c < 0)
619 break;
620
621 len = unichar_to_gsm7_count( c );
622 if (len == 0) /* unknown chars are replaced by spaces */
623 len = 1;
624
625 if (len > gsm7len)
626 break;
627
628 gsm7len -= len;
629 p = q;
630 }
631 return p;
632 }
633
634
635 int
utf8_check_gsm7(cbytes_t utf8,int utf8len)636 utf8_check_gsm7( cbytes_t utf8,
637 int utf8len )
638 {
639 cbytes_t utf8end = utf8 + utf8len;
640
641 while (utf8 < utf8end) {
642 int c = utf8_next( &utf8, utf8end );
643 if (unichar_to_gsm7_count(c) == 0)
644 return 0;
645 }
646 return 1;
647 }
648
649
650 int
utf8_from_gsm7(cbytes_t src,int septet_offset,int septet_count,bytes_t utf8)651 utf8_from_gsm7( cbytes_t src,
652 int septet_offset,
653 int septet_count,
654 bytes_t utf8 )
655 {
656 int shift = (septet_offset & 7);
657 int escaped = 0;
658 int result = 0;
659
660 src += (septet_offset >> 3);
661 for ( ; septet_count > 0; septet_count-- )
662 {
663 int c = (src[0] >> shift) & 0x7f;
664 int v;
665
666 if (shift > 1) {
667 c = ((src[1] << (8-shift)) | c) & 0x7f;
668 }
669
670 if (escaped) {
671 v = gsm7bits_extend_to_unicode[c];
672 } else if (c == GSM_7BITS_ESCAPE) {
673 escaped = 1;
674 goto NextSeptet;
675 } else {
676 v = gsm7bits_to_unicode[c];
677 }
678
679 result += utf8_write( utf8, result, v );
680
681 NextSeptet:
682 shift += 7;
683 if (shift >= 8) {
684 shift -= 8;
685 src += 1;
686 }
687 }
688 return result;
689 }
690
691
692 int
utf8_from_gsm8(cbytes_t src,int count,bytes_t utf8)693 utf8_from_gsm8( cbytes_t src, int count, bytes_t utf8 )
694 {
695 int result = 0;
696 int escaped = 0;
697
698
699 for ( ; count > 0; count-- )
700 {
701 int c = *src++;
702
703 if (c == 0xff)
704 break;
705
706 if (c == GSM_7BITS_ESCAPE) {
707 if (escaped) { /* two escape characters => one space */
708 c = 0x20;
709 escaped = 0;
710 } else {
711 escaped = 1;
712 continue;
713 }
714 }
715 else
716 {
717 if (c >= 0x80) {
718 c = 0x20;
719 escaped = 0;
720 } else if (escaped) {
721 c = gsm7bits_extend_to_unicode[c];
722 } else
723 c = gsm7bits_to_unicode[c];
724 }
725
726 result += utf8_write( utf8, result, c );
727 }
728 return result;
729 }
730
731 /* convert a GSM 7-bit message into a unicode character array
732 * the 'dst' array must contain at least 160 chars. the function
733 * returns the number of characters decoded
734 *
735 * assumes the 'dst' array has at least septet_count items, returns the
736 * number of unichars really written
737 */
738 int
ucs2_from_gsm7(bytes_t ucs2,cbytes_t src,int septet_offset,int septet_count)739 ucs2_from_gsm7( bytes_t ucs2,
740 cbytes_t src,
741 int septet_offset,
742 int septet_count )
743 {
744 const unsigned char* p = src + (septet_offset >> 3);
745 int shift = (septet_offset & 7);
746 int escaped = 0;
747 int result = 0;
748
749 for ( ; septet_count > 0; septet_count-- )
750 {
751 unsigned val = (p[0] >> shift) & 0x7f;
752
753 if (shift > 1)
754 val = (val | (p[1] << (8-shift))) & 0x7f;
755
756 if (escaped) {
757 int c = gsm7bits_to_unicode[val];
758
759 result += ucs2_write(ucs2, result, c);
760 escaped = 0;
761 }
762 else if (val == GSM_7BITS_ESCAPE) {
763 escaped = 1;
764 }
765 else {
766 val = gsm7bits_extend_to_unicode[val];
767 if (val == 0)
768 val = 0x20;
769
770 result += ucs2_write( ucs2, result, val );
771 }
772 }
773 return result/2;
774 }
775
776
777 /* count the number of septets required to write a utf8 string */
778 static int
utf8_to_gsm7_count(cbytes_t utf8,int utf8len)779 utf8_to_gsm7_count( cbytes_t utf8, int utf8len )
780 {
781 cbytes_t utf8end = utf8 + utf8len;
782 int result = 0;
783
784 while ( utf8 < utf8end ) {
785 int len;
786 int c = utf8_next( &utf8, utf8end );
787
788 if (c < 0)
789 break;
790
791 len = unichar_to_gsm7_count(c);
792 if (len == 0) /* replace non-representables with space */
793 len = 1;
794
795 result += len;
796 }
797 return result;
798 }
799
800 typedef struct {
801 bytes_t dst;
802 unsigned pad;
803 int bits;
804 int offset;
805 } BWriterRec, *BWriter;
806
807 static void
bwriter_init(BWriter writer,bytes_t dst,int start)808 bwriter_init( BWriter writer, bytes_t dst, int start )
809 {
810 int shift = start & 7;
811
812 writer->dst = dst + (start >> 3);
813 writer->pad = 0;
814 writer->bits = shift;
815 writer->offset = start;
816
817 if (shift > 0) {
818 writer->pad = writer->dst[0] & ~(0xFF << shift);
819 }
820 }
821
822 static void
bwriter_add7(BWriter writer,unsigned value)823 bwriter_add7( BWriter writer, unsigned value )
824 {
825 writer->pad |= (unsigned)(value << writer->bits);
826 writer->bits += 7;
827 if (writer->bits >= 8) {
828 writer->dst[0] = (byte_t)writer->pad;
829 writer->bits -= 8;
830 writer->pad >>= 8;
831 writer->dst += 1;
832 }
833 writer->offset += 7;
834 }
835
836 static int
bwriter_done(BWriter writer)837 bwriter_done( BWriter writer )
838 {
839 if (writer->bits > 0) {
840 writer->dst[0] = (byte_t)writer->pad;
841 writer->pad = 0;
842 writer->bits = 0;
843 writer->dst += 1;
844 }
845 return writer->offset;
846 }
847
848 /* convert a utf8 string to a gsm7 byte string - return the number of septets written */
849 int
utf8_to_gsm7(cbytes_t utf8,int utf8len,bytes_t dst,int offset)850 utf8_to_gsm7( cbytes_t utf8, int utf8len, bytes_t dst, int offset )
851 {
852 const unsigned char* utf8end = utf8 + utf8len;
853 BWriterRec writer[1];
854
855 if (dst == NULL)
856 return utf8_to_gsm7_count(utf8, utf8len);
857
858 bwriter_init( writer, dst, offset );
859 while ( utf8 < utf8end ) {
860 int c = utf8_next( &utf8, utf8end );
861 int nn;
862
863 if (c < 0)
864 break;
865
866 nn = unichar_to_gsm7(c);
867 if (nn >= 0) {
868 bwriter_add7( writer, nn );
869 continue;
870 }
871
872 nn = unichar_to_gsm7_extend(c);
873 if (nn >= 0) {
874 bwriter_add7( writer, GSM_7BITS_ESCAPE );
875 bwriter_add7( writer, nn );
876 continue;
877 }
878
879 /* unknown => replaced by space */
880 bwriter_add7( writer, 0x20 );
881 }
882 return bwriter_done( writer );
883 }
884
885
886 int
utf8_to_gsm8(cbytes_t utf8,int utf8len,bytes_t dst)887 utf8_to_gsm8( cbytes_t utf8, int utf8len, bytes_t dst )
888 {
889 const unsigned char* utf8end = utf8 + utf8len;
890 int result = 0;
891
892 while ( utf8 < utf8end ) {
893 int c = utf8_next( &utf8, utf8end );
894 int nn;
895
896 if (c < 0)
897 break;
898
899 nn = unichar_to_gsm7(c);
900 if (nn >= 0) {
901 if (dst)
902 dst[result] = (byte_t)nn;
903 result += 1;
904 continue;
905 }
906
907 nn = unichar_to_gsm7_extend(c);
908 if (nn >= 0) {
909 if (dst) {
910 dst[result+0] = (byte_t) GSM_7BITS_ESCAPE;
911 dst[result+1] = (byte_t) nn;
912 }
913 result += 2;
914 continue;
915 }
916
917 /* unknown => space */
918 if (dst)
919 dst[result] = 0x20;
920 result += 1;
921 }
922 return result;
923 }
924
925
926 int
ucs2_to_gsm7(cbytes_t ucs2,int ucs2len,bytes_t dst,int offset)927 ucs2_to_gsm7( cbytes_t ucs2, int ucs2len, bytes_t dst, int offset )
928 {
929 const unsigned char* ucs2end = ucs2 + ucs2len*2;
930 BWriterRec writer[1];
931
932 bwriter_init( writer, dst, offset );
933 while ( ucs2 < ucs2end ) {
934 int c = *ucs2++;
935 int nn;
936
937 for (nn = 0; nn < 128; nn++) {
938 if ( gsm7bits_to_unicode[nn] == c ) {
939 bwriter_add7( writer, nn );
940 goto NextUnicode;
941 }
942 }
943 for (nn = 0; nn < 128; nn++) {
944 if ( gsm7bits_extend_to_unicode[nn] == c ) {
945 bwriter_add7( writer, GSM_7BITS_ESCAPE );
946 bwriter_add7( writer, nn );
947 goto NextUnicode;
948 }
949 }
950
951 /* unknown */
952 bwriter_add7( writer, 0x20 );
953
954 NextUnicode:
955 ;
956 }
957 return bwriter_done( writer );
958 }
959
960
961 int
ucs2_to_gsm8(cbytes_t ucs2,int ucs2len,bytes_t dst)962 ucs2_to_gsm8( cbytes_t ucs2, int ucs2len, bytes_t dst )
963 {
964 const unsigned char* ucs2end = ucs2 + ucs2len*2;
965 bytes_t dst0 = dst;
966
967 while ( ucs2 < ucs2end ) {
968 int c = *ucs2++;
969 int nn;
970
971 for (nn = 0; nn < 128; nn++) {
972 if ( gsm7bits_to_unicode[nn] == c ) {
973 *dst++ = (byte_t)nn;
974 goto NextUnicode;
975 }
976 }
977 for (nn = 0; nn < 128; nn++) {
978 if ( gsm7bits_extend_to_unicode[nn] == c ) {
979 dst[0] = (byte_t) GSM_7BITS_ESCAPE;
980 dst[1] = (byte_t) nn;
981 dst += 2;
982 goto NextUnicode;
983 }
984 }
985
986 /* unknown */
987 *dst++ = 0x20;
988
989 NextUnicode:
990 ;
991 }
992 return (dst - dst0);
993 }
994
995 int
gsm_bcdnum_to_ascii(cbytes_t bcd,int count,bytes_t dst)996 gsm_bcdnum_to_ascii( cbytes_t bcd, int count, bytes_t dst )
997 {
998 int result = 0;
999 int shift = 0;
1000
1001 while (count > 0) {
1002 int c = (bcd[0] >> shift) & 0xf;
1003
1004 if (c == 15 && count == 1) /* ignore trailing 0xf */
1005 break;
1006
1007 if (c >= 14)
1008 c = 0;
1009
1010 if (dst) dst[result] = "0123456789*#,N"[c];
1011 result += 1;
1012
1013 shift += 4;
1014 if (shift == 8) {
1015 shift = 0;
1016 bcd += 1;
1017 }
1018 }
1019 return result;
1020 }
1021
1022
1023 int
gsm_bcdnum_from_ascii(cbytes_t ascii,int asciilen,bytes_t dst)1024 gsm_bcdnum_from_ascii( cbytes_t ascii, int asciilen, bytes_t dst )
1025 {
1026 cbytes_t end = ascii + asciilen;
1027 int result = 0;
1028 int phase = 0x01;
1029
1030 while (ascii < end) {
1031 int c = *ascii++;
1032
1033 if (c == '*')
1034 c = 10;
1035 else if (c == '#')
1036 c = 11;
1037 else if (c == ',')
1038 c = 12;
1039 else if (c == 'N')
1040 c = 13;
1041 else {
1042 c -= '0';
1043 if ((unsigned)c >= 10U)
1044 return -1;
1045 }
1046 phase = (phase << 4) | c;
1047 result += 1;
1048 if (phase & 0x100) {
1049 if (dst) dst[result/2] = (byte_t) phase;
1050 phase = 0x01;
1051 }
1052 }
1053
1054 if (result & 1) {
1055 if (dst) dst[result/2] = (byte_t)(phase | 0xf0);
1056 }
1057 return result;
1058 }
1059
1060 /** ADN: Abbreviated Dialing Number
1061 **/
1062
1063 #define ADN_FOOTER_SIZE 14
1064 #define ADN_OFFSET_NUMBER_LENGTH 0
1065 #define ADN_OFFSET_TON_NPI 1
1066 #define ADN_OFFSET_NUMBER_START 2
1067 #define ADN_OFFSET_NUMBER_END 11
1068 #define ADN_OFFSET_CAPABILITY_ID 12
1069 #define ADN_OFFSET_EXTENSION_ID 13
1070
1071 /* see 10.5.1 of 3GPP 51.011 */
1072 static int
sim_adn_alpha_to_utf8(cbytes_t alpha,cbytes_t end,bytes_t dst)1073 sim_adn_alpha_to_utf8( cbytes_t alpha, cbytes_t end, bytes_t dst )
1074 {
1075 int result = 0;
1076
1077 /* ignore trailing 0xff */
1078 while (alpha < end && end[-1] == 0xff)
1079 end--;
1080
1081 if (alpha >= end)
1082 return 0;
1083
1084 if (alpha[0] == 0x80) { /* UCS/2 source encoding */
1085 alpha += 1;
1086 result = ucs2_to_utf8( alpha, (end-alpha)/2, dst );
1087 }
1088 else
1089 {
1090 int is_ucs2 = 0;
1091 int len = 0, base = 0;
1092
1093 if (alpha+3 <= end && alpha[0] == 0x81) {
1094 is_ucs2 = 1;
1095 len = alpha[1];
1096 base = alpha[2] << 7;
1097 alpha += 3;
1098 if (len > end-alpha)
1099 len = end-alpha;
1100 } else if (alpha+4 <= end && alpha[0] == 0x82) {
1101 is_ucs2 = 1;
1102 len = alpha[1];
1103 base = (alpha[2] << 8) | alpha[3];
1104 alpha += 4;
1105 if (len > end-alpha)
1106 len = end-alpha;
1107 }
1108
1109 if (is_ucs2) {
1110 end = alpha + len;
1111 while (alpha < end) {
1112 int c = alpha[0];
1113 if (c >= 0x80) {
1114 result += utf8_write(dst, result, base + (c & 0x7f));
1115 alpha += 1;
1116 } else {
1117 /* GSM character set */
1118 int count;
1119 for (count = 0; alpha+count < end && alpha[count] < 128; count++)
1120 ;
1121 result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL));
1122 alpha += count;
1123 }
1124 }
1125 }
1126 else {
1127 result = utf8_from_gsm8(alpha, end-alpha, dst);
1128 }
1129 }
1130 return result;
1131 }
1132
1133 #if 0
1134 static int
1135 sim_adn_alpha_from_utf8( cbytes_t utf8, int utf8len, bytes_t dst )
1136 {
1137 int result = 0;
1138
1139 if (utf8_check_gsm7(utf8, utf8len)) {
1140 /* GSM 7-bit compatible, encode directly as 8-bit string */
1141 result = utf8_to_gsm8(utf8, utf8len, dst);
1142 } else {
1143 /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */
1144 if (dst) {
1145 dst[0] = 0x80;
1146 }
1147 result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2;
1148 }
1149 return result;
1150 }
1151 #endif
1152
1153 int
sim_adn_record_from_bytes(SimAdnRecord rec,cbytes_t data,int len)1154 sim_adn_record_from_bytes( SimAdnRecord rec, cbytes_t data, int len )
1155 {
1156 cbytes_t end = data + len;
1157 cbytes_t footer = end - ADN_FOOTER_SIZE;
1158 int num_len;
1159
1160 rec->adn.alpha[0] = 0;
1161 rec->adn.number[0] = 0;
1162 rec->ext_record = 0xff;
1163
1164 if (len < ADN_FOOTER_SIZE)
1165 return -1;
1166
1167 /* alpha is optional */
1168 if (len > ADN_FOOTER_SIZE) {
1169 cbytes_t dataend = data + len - ADN_FOOTER_SIZE;
1170 int count = sim_adn_alpha_to_utf8(data, dataend, NULL);
1171
1172 if (count > sizeof(rec->adn.alpha)-1) /* too long */
1173 return -1;
1174
1175 sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha);
1176 rec->adn.alpha[count] = 0;
1177 }
1178
1179 num_len = footer[ADN_OFFSET_NUMBER_LENGTH];
1180 if (num_len > 11)
1181 return -1;
1182
1183 /* decode TON and number to ASCII, NOTE: this is lossy !! */
1184 {
1185 int ton = footer[ADN_OFFSET_TON_NPI];
1186 bytes_t number = (bytes_t) rec->adn.number;
1187 int len = sizeof(rec->adn.number)-1;
1188 int count;
1189
1190 if (ton != 0x81 && ton != 0x91)
1191 return -1;
1192
1193 if (ton == 0x91) {
1194 *number++ = '+';
1195 len -= 1;
1196 }
1197
1198 count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START,
1199 num_len*2, number );
1200 number[count] = 0;
1201 }
1202 return 0;
1203 }
1204
1205 int
sim_adn_record_to_bytes(SimAdnRecord rec,bytes_t data,int datalen)1206 sim_adn_record_to_bytes( SimAdnRecord rec, bytes_t data, int datalen )
1207 {
1208 bytes_t end = data + datalen;
1209 bytes_t footer = end - ADN_FOOTER_SIZE;
1210 int ton = 0x81;
1211 cbytes_t number = (cbytes_t) rec->adn.number;
1212
1213 if (number[0] == '+') {
1214 ton = 0x91;
1215 number += 1;
1216 }
1217 footer[0] = (strlen((const char*)number)+1)/2 + 1;
1218 /* XXXX: TODO */
1219 return 0;
1220 }
1221