• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (C) 2007-2008 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 ** GNU General Public License for more details.
11 */
12 #include "gsm.h"
13 #include <stdlib.h>
14 #include <string.h>
15 
16 /** UTILITIES
17  **/
18 byte_t
gsm_int_to_bcdi(int value)19 gsm_int_to_bcdi( int  value )
20 {
21     return (byte_t)((value / 10) | ((value % 10) << 4));
22 }
23 
24 int
gsm_int_from_bcdi(byte_t val)25 gsm_int_from_bcdi( byte_t  val )
26 {
27     int  ret = 0;
28 
29     if ((val & 0xf0) <= 0x90)
30         ret = (val >> 4);
31 
32     if ((val & 0x0f) <= 0x90)
33         ret |= (val % 0xf)*10;
34 
35     return ret;
36 }
37 
38 #if 0
39 static int
40 gsm_bcdi_to_ascii( cbytes_t  bcd, int  bcdlen, bytes_t  dst )
41 {
42     static byte_t  bcdichars[14] = "0123456789*#,N";
43 
44     int  result = 0;
45     int  shift  = 0;
46 
47     while (bcdlen > 0) {
48         int  c = (bcd[0] >> shift) & 0xf;
49 
50         if (c == 0xf && bcdlen == 1)
51             break;
52 
53         if (c < 14) {
54             if (dst) dst[result] = bcdichars[c];
55             result += 1;
56         }
57         bcdlen --;
58         shift += 4;
59         if (shift == 8) {
60             bcd++;
61             shift = 0;
62         }
63     }
64     return result;
65 }
66 #endif
67 
68 #if 0
69 static int
70 gsm_bcdi_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
71 {
72     cbytes_t  end    = ascii + asciilen;
73     int       result = 0;
74     int       phase  = 0x01;
75 
76     while (ascii < end) {
77         int  c = *ascii++;
78 
79         if (c == '*')
80             c = 11;
81         else if (c == '#')
82             c = 12;
83         else if (c == ',')
84             c = 13;
85         else if (c == 'N')
86             c = 14;
87         else {
88             c -= '0';
89             if ((unsigned)c >= 10)
90                 break;
91         }
92         phase = (phase << 4) | c;
93         if (phase & 0x100) {
94             if (dst) dst[result] = (byte_t) phase;
95             result += 1;
96             phase   = 0x01;
97         }
98     }
99     if (phase != 0x01) {
100         if (dst) dst[result] = (byte_t)( phase | 0xf0 );
101         result += 1;
102     }
103     return  result;
104 }
105 #endif
106 
107 int
gsm_hexchar_to_int(char c)108 gsm_hexchar_to_int( char  c )
109 {
110     if ((unsigned)(c - '0') < 10)
111         return c - '0';
112     if ((unsigned)(c - 'a') < 6)
113         return 10 + (c - 'a');
114     if ((unsigned)(c - 'A') < 6)
115         return 10 + (c - 'A');
116     return -1;
117 }
118 
119 int
gsm_hexchar_to_int0(char c)120 gsm_hexchar_to_int0( char  c )
121 {
122     int  ret = gsm_hexchar_to_int(c);
123 
124     return (ret < 0) ? 0 : ret;
125 }
126 
127 int
gsm_hex2_to_byte(const char * hex)128 gsm_hex2_to_byte( const char*  hex )
129 {
130     int  hi = gsm_hexchar_to_int(hex[0]);
131     int  lo = gsm_hexchar_to_int(hex[1]);
132 
133     if (hi < 0 || lo < 0)
134         return -1;
135 
136     return ( (hi << 4) | lo );
137 }
138 
139 int
gsm_hex4_to_short(const char * hex)140 gsm_hex4_to_short( const char*  hex )
141 {
142     int  hi = gsm_hex2_to_byte(hex);
143     int  lo = gsm_hex2_to_byte(hex+2);
144 
145     if (hi < 0 || lo < 0)
146         return -1;
147 
148     return ((hi << 8) | lo);
149 }
150 
151 int
gsm_hex2_to_byte0(const char * hex)152 gsm_hex2_to_byte0( const char*  hex )
153 {
154     int  hi = gsm_hexchar_to_int0(hex[0]);
155     int  lo = gsm_hexchar_to_int0(hex[1]);
156 
157     return (byte_t)( (hi << 4) | lo );
158 }
159 
160 void
gsm_hex_from_byte(char * hex,int val)161 gsm_hex_from_byte( char*  hex, int val )
162 {
163     static const char  hexdigits[] = "0123456789abcdef";
164 
165     hex[0] = hexdigits[(val >> 4) & 15];
166     hex[1] = hexdigits[val & 15];
167 }
168 
169 void
gsm_hex_from_short(char * hex,int val)170 gsm_hex_from_short( char*  hex, int  val )
171 {
172     gsm_hex_from_byte( hex,   (val >> 8) );
173     gsm_hex_from_byte( hex+2, val );
174 }
175 
176 
177 
178 /** HEX
179  **/
180 void
gsm_hex_to_bytes0(cbytes_t hex,int hexlen,bytes_t dst)181 gsm_hex_to_bytes0( cbytes_t  hex, int  hexlen, bytes_t  dst )
182 {
183     int  nn;
184 
185     for (nn = 0; nn < hexlen/2; nn++ ) {
186         dst[nn] = (byte_t) gsm_hex2_to_byte0( (const char*)hex+2*nn );
187     }
188     if (hexlen & 1) {
189         dst[nn] = gsm_hexchar_to_int0( hex[2*nn] ) << 4;
190     }
191 }
192 
193 int
gsm_hex_to_bytes(cbytes_t hex,int hexlen,bytes_t dst)194 gsm_hex_to_bytes( cbytes_t  hex, int  hexlen, bytes_t  dst )
195 {
196     int  nn;
197 
198     if (hexlen & 1)  /* must be even */
199         return -1;
200 
201     for (nn = 0; nn < hexlen/2; nn++ ) {
202         int  c = gsm_hex2_to_byte( (const char*)hex+2*nn );
203         if (c < 0) return -1;
204         dst[nn] = (byte_t) c;
205     }
206     return hexlen/2;
207 }
208 
209 void
gsm_hex_from_bytes(char * hex,cbytes_t src,int srclen)210 gsm_hex_from_bytes( char*  hex, cbytes_t  src, int  srclen )
211 {
212     int  nn;
213 
214     for (nn = 0; nn < srclen; nn++) {
215         gsm_hex_from_byte( hex + 2*nn, src[nn] );
216     }
217 }
218 
219 /** ROPES
220  **/
221 
222 void
gsm_rope_init(GsmRope rope)223 gsm_rope_init( GsmRope  rope )
224 {
225     rope->data  = NULL;
226     rope->pos   = 0;
227     rope->max   = 0;
228     rope->error = 0;
229 }
230 
231 void
gsm_rope_init_alloc(GsmRope rope,int count)232 gsm_rope_init_alloc( GsmRope  rope, int  count )
233 {
234     rope->data  = rope->data0;
235     rope->pos   = 0;
236     rope->max   = sizeof(rope->data0);
237     rope->error = 0;
238 
239     if (count > 0) {
240         rope->data = calloc( count, 1 );
241         rope->max  = count;
242 
243         if (rope->data == NULL) {
244             rope->error = 1;
245             rope->max   = 0;
246         }
247     }
248 }
249 
250 int
gsm_rope_done(GsmRope rope)251 gsm_rope_done( GsmRope  rope )
252 {
253     int  result = rope->error;
254 
255     if (rope->data && rope->data != rope->data0)
256         free(rope->data);
257 
258     rope->data  = NULL;
259     rope->pos   = 0;
260     rope->max   = 0;
261     rope->error = 0;
262 
263     return result;
264 }
265 
266 
267 bytes_t
gsm_rope_done_acquire(GsmRope rope,int * psize)268 gsm_rope_done_acquire( GsmRope  rope, int  *psize )
269 {
270     bytes_t  result = rope->data;
271 
272     *psize = rope->pos;
273     if (result == rope->data0) {
274         result = malloc(  rope->pos );
275         if (result != NULL)
276             memcpy( result, rope->data, rope->pos );
277     }
278     return result;
279 }
280 
281 
282 int
gsm_rope_ensure(GsmRope rope,int new_count)283 gsm_rope_ensure( GsmRope  rope, int  new_count )
284 {
285     if (rope->data != NULL) {
286         int       old_max  = rope->max;
287         bytes_t   old_data = rope->data == rope->data0 ? NULL : rope->data;
288         int       new_max  = old_max;
289         bytes_t   new_data;
290 
291         while (new_max < new_count) {
292             new_max += (new_max >> 1) + 4;
293         }
294         new_data = realloc( old_data, new_max );
295         if (new_data == NULL) {
296             rope->error = 1;
297             return -1;
298         }
299         rope->data = new_data;
300         rope->max  = new_max;
301     } else {
302         rope->max = new_count;
303     }
304     return 0;
305 }
306 
307 static int
gsm_rope_can_grow(GsmRope rope,int count)308 gsm_rope_can_grow( GsmRope  rope, int  count )
309 {
310     if (!rope->data || rope->error)
311         return 0;
312 
313     if (rope->pos + count > rope->max)
314     {
315         if (rope->data == NULL)
316             rope->max = rope->pos + count;
317 
318         else if (rope->error ||
319                  gsm_rope_ensure( rope, rope->pos + count ) < 0)
320             return 0;
321     }
322     return 1;
323 }
324 
325 void
gsm_rope_add_c(GsmRope rope,char c)326 gsm_rope_add_c( GsmRope  rope,  char  c )
327 {
328     if (gsm_rope_can_grow(rope, 1)) {
329         rope->data[ rope->pos ] = (byte_t) c;
330     }
331     rope->pos += 1;
332 }
333 
334 void
gsm_rope_add(GsmRope rope,const void * buf,int buflen)335 gsm_rope_add( GsmRope  rope, const void*  buf, int  buflen )
336 {
337     if (gsm_rope_can_grow(rope, buflen)) {
338         memcpy( rope->data + rope->pos, (const char*)buf, buflen );
339     }
340     rope->pos += buflen;
341 }
342 
343 void*
gsm_rope_reserve(GsmRope rope,int count)344 gsm_rope_reserve( GsmRope  rope, int  count )
345 {
346     void*  result = NULL;
347 
348     if (gsm_rope_can_grow(rope, count))
349     {
350         if (rope->data != NULL)
351             result = rope->data + rope->pos;
352     }
353     rope->pos += count;
354 
355     return result;
356 }
357 
358 /* skip a given number of Unicode characters in a utf-8 byte string */
359 cbytes_t
utf8_skip(cbytes_t utf8,cbytes_t utf8end,int count)360 utf8_skip( cbytes_t   utf8,
361            cbytes_t   utf8end,
362            int        count)
363 {
364     cbytes_t  p   = utf8;
365     cbytes_t  end = utf8end;
366 
367     for ( ; count > 0; count-- ) {
368         int  c;
369 
370         if (p >= end)
371             break;
372 
373         c = *p++;
374         if (c > 128) {
375             while (p < end && (p[0] & 0xc0) == 0x80)
376                 p++;
377         }
378     }
379     return  p;
380 }
381 
382 
383 static __inline__ int
utf8_next(cbytes_t * pp,cbytes_t end)384 utf8_next( cbytes_t  *pp, cbytes_t  end )
385 {
386     cbytes_t  p      = *pp;
387     int       result = -1;
388 
389     if (p < end) {
390         int  c= *p++;
391         if (c >= 128) {
392             if ((c & 0xe0) == 0xc0)
393                 c &= 0x1f;
394             else if ((c & 0xf0) == 0xe0)
395                 c &= 0x0f;
396             else
397                 c &= 0x07;
398 
399             while (p < end && (p[0] & 0xc0) == 0x80) {
400                 c = (c << 6) | (p[0] & 0x3f);
401                 p ++;
402             }
403         }
404         result = c;
405         *pp    = p;
406     }
407     return result;
408 }
409 
410 
411 __inline__ int
utf8_write(bytes_t utf8,int offset,int v)412 utf8_write( bytes_t  utf8, int  offset, int  v )
413 {
414     int  result;
415 
416     if (v < 128) {
417         result = 1;
418         if (utf8)
419             utf8[offset] = (byte_t) v;
420     } else if (v < 0x800) {
421         result = 2;
422         if (utf8) {
423             utf8[offset+0] = (byte_t)( 0xc0 | (v >> 6) );
424             utf8[offset+1] = (byte_t)( 0x80 | (v & 0x3f) );
425         }
426     } else if (v < 0x10000) {
427         result = 3;
428         if (utf8) {
429             utf8[offset+0] = (byte_t)( 0xe0 |  (v >> 12) );
430             utf8[offset+1] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
431             utf8[offset+2] = (byte_t)( 0x80 |  (v & 0x3f) );
432         }
433     } else {
434         result = 4;
435         if (utf8) {
436             utf8[offset+0] = (byte_t)( 0xf0 | ((v >> 18) & 0x7) );
437             utf8[offset+1] = (byte_t)( 0x80 | ((v >> 12) & 0x3f) );
438             utf8[offset+2] = (byte_t)( 0x80 | ((v >> 6) & 0x3f) );
439             utf8[offset+3] = (byte_t)( 0x80 |  (v & 0x3f) );
440         }
441     }
442     return  result;
443 }
444 
445 static __inline__ int
ucs2_write(bytes_t ucs2,int offset,int v)446 ucs2_write( bytes_t  ucs2, int  offset, int  v )
447 {
448     if (ucs2) {
449         ucs2[offset+0] = (byte_t) (v >> 8);
450         ucs2[offset+1] = (byte_t) (v);
451     }
452     return 2;
453 }
454 
455 int
utf8_check(cbytes_t p,int utf8len)456 utf8_check( cbytes_t   p, int  utf8len )
457 {
458     cbytes_t  end    = p + utf8len;
459     int       result = 0;
460 
461     if (p) {
462         while (p < end) {
463             int  c = *p++;
464             if (c >= 128) {
465                 int  len;
466                 if ((c & 0xe0) == 0xc0) {
467                     len = 1;
468                 }
469                 else if ((c & 0xf0) == 0xe0) {
470                     len = 2;
471                 }
472                 else if ((c & 0xf8) == 0xf0) {
473                     len = 3;
474                 }
475                 else
476                     goto Exit;  /* malformed utf-8 */
477 
478                 if (p+len > end) /* string too short */
479                     goto Exit;
480 
481                 for ( ; len > 0; len--, p++ ) {
482                     if ((p[0] & 0xc0) != 0x80)
483                         goto Exit;
484                 }
485             }
486         }
487         result = 1;
488     }
489 Exit:
490     return result;
491 }
492 
493 /** UCS2 to UTF8
494  **/
495 
496 /* convert a UCS2 string into a UTF8 byte string, assumes 'buf' is correctly sized */
497 int
ucs2_to_utf8(cbytes_t ucs2,int ucs2len,bytes_t buf)498 ucs2_to_utf8( cbytes_t  ucs2,
499               int       ucs2len,
500               bytes_t   buf )
501 {
502     int  nn;
503     int  result = 0;
504 
505     for (nn = 0; nn < ucs2len; ucs2 += 2, nn++) {
506         int  c= (ucs2[0] << 8) | ucs2[1];
507         result += utf8_write(buf, result, c);
508     }
509     return result;
510 }
511 
512 /* count the number of UCS2 chars contained in a utf8 byte string */
513 int
utf8_to_ucs2(cbytes_t utf8,int utf8len,bytes_t ucs2)514 utf8_to_ucs2( cbytes_t  utf8,
515               int       utf8len,
516               bytes_t   ucs2 )
517 {
518     cbytes_t  p      = utf8;
519     cbytes_t  end    = p + utf8len;
520     int       result = 0;
521 
522     while (p < end) {
523         int  c = utf8_next(&p, end);
524 
525         if (c < 0)
526             break;
527 
528         result += ucs2_write(ucs2, result, c);
529     }
530     return result/2;
531 }
532 
533 
534 
535 /** GSM ALPHABET
536  **/
537 
538 #define  GSM_7BITS_ESCAPE   0x1b
539 #define  GSM_7BITS_UNKNOWN  0
540 
541 static const unsigned short   gsm7bits_to_unicode[128] = {
542   '@', 0xa3,  '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, 0xf2, 0xc7, '\n', 0xd8, 0xf8, '\r', 0xc5, 0xe5,
543 0x394,  '_',0x3a6,0x393,0x39b,0x3a9,0x3a0,0x3a8,0x3a3,0x398,0x39e,    0, 0xc6, 0xe6, 0xdf, 0xc9,
544   ' ',  '!',  '"',  '#', 0xa4,  '%',  '&', '\'',  '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
545   '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',  '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
546  0xa1,  'A',  'B',  'C',  'D',  'E',  'F',  'G',  'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
547   'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',  'X',  'Y',  'Z', 0xc4, 0xd6,0x147, 0xdc, 0xa7,
548  0xbf,  'a',  'b',  'c',  'd',  'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
549   'p',  'q',  'r',  's',  't',  'u',  'v',  'w',  'x',  'y',  'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0,
550 };
551 
552 static const unsigned short  gsm7bits_extend_to_unicode[128] = {
553     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\f',   0,   0,   0,   0,   0,
554     0,   0,   0,   0, '^',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
555     0,   0,   0,   0,   0,   0,   0,   0, '{', '}',   0,   0,   0,   0,   0,'\\',
556     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '[', '~', ']',   0,
557   '|',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
558     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
559     0,   0,   0,   0,   0,0x20ac, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
560     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
561 };
562 
563 
564 static int
unichar_to_gsm7(int unicode)565 unichar_to_gsm7( int  unicode )
566 {
567     int  nn;
568     for (nn = 0; nn < 128; nn++) {
569         if (gsm7bits_to_unicode[nn] == unicode) {
570             return nn;
571         }
572     }
573     return -1;
574 }
575 
576 static int
unichar_to_gsm7_extend(int unichar)577 unichar_to_gsm7_extend( int  unichar )
578 {
579     int  nn;
580     for (nn = 0; nn < 128; nn++) {
581         if (gsm7bits_extend_to_unicode[nn] == unichar) {
582             return nn;
583         }
584     }
585     return -1;
586 }
587 
588 
589 /* return the number of septets needed to encode a unicode charcode */
590 static int
unichar_to_gsm7_count(int unicode)591 unichar_to_gsm7_count( int  unicode )
592 {
593     int  nn;
594 
595     nn = unichar_to_gsm7(unicode);
596     if (nn >= 0)
597         return 1;
598 
599     nn = unichar_to_gsm7_extend(unicode);
600     if (nn >= 0)
601         return 2;
602 
603     return 0;
604 }
605 
606 
607 cbytes_t
utf8_skip_gsm7(cbytes_t utf8,cbytes_t utf8end,int gsm7len)608 utf8_skip_gsm7( cbytes_t  utf8, cbytes_t  utf8end, int  gsm7len )
609 {
610     cbytes_t  p   = utf8;
611     cbytes_t  end = utf8end;
612 
613     while (gsm7len >0) {
614         cbytes_t  q = p;
615         int       c = utf8_next( &q, end );
616         int       len;
617 
618         if (c < 0)
619             break;
620 
621         len = unichar_to_gsm7_count( c );
622         if (len == 0)  /* unknown chars are replaced by spaces */
623             len = 1;
624 
625         if (len > gsm7len)
626             break;
627 
628         gsm7len -= len;
629         p        = q;
630     }
631     return  p;
632 }
633 
634 
635 int
utf8_check_gsm7(cbytes_t utf8,int utf8len)636 utf8_check_gsm7( cbytes_t  utf8,
637                  int       utf8len )
638 {
639     cbytes_t  utf8end = utf8 + utf8len;
640 
641     while (utf8 < utf8end) {
642         int  c = utf8_next( &utf8, utf8end );
643         if (unichar_to_gsm7_count(c) == 0)
644             return 0;
645     }
646     return 1;
647 }
648 
649 
650 int
utf8_from_gsm7(cbytes_t src,int septet_offset,int septet_count,bytes_t utf8)651 utf8_from_gsm7( cbytes_t  src,
652                 int       septet_offset,
653                 int       septet_count,
654                 bytes_t   utf8 )
655 {
656     int  shift   = (septet_offset & 7);
657     int  escaped = 0;
658     int  result  = 0;
659 
660     src += (septet_offset >> 3);
661     for ( ; septet_count > 0; septet_count-- )
662     {
663         int  c = (src[0] >> shift) & 0x7f;
664         int  v;
665 
666         if (shift > 1) {
667             c = ((src[1] << (8-shift)) | c) & 0x7f;
668         }
669 
670         if (escaped) {
671             v = gsm7bits_extend_to_unicode[c];
672         } else if (c == GSM_7BITS_ESCAPE) {
673             escaped = 1;
674             goto NextSeptet;
675         } else {
676             v = gsm7bits_to_unicode[c];
677         }
678 
679         result += utf8_write( utf8, result, v );
680 
681     NextSeptet:
682         shift += 7;
683         if (shift >= 8) {
684             shift -= 8;
685             src   += 1;
686         }
687     }
688     return  result;
689 }
690 
691 
692 int
utf8_from_gsm8(cbytes_t src,int count,bytes_t utf8)693 utf8_from_gsm8( cbytes_t  src, int  count, bytes_t  utf8 )
694 {
695     int  result  = 0;
696     int  escaped = 0;
697 
698 
699     for ( ; count > 0; count-- )
700     {
701         int  c = *src++;
702 
703         if (c == 0xff)
704             break;
705 
706         if (c == GSM_7BITS_ESCAPE) {
707             if (escaped) { /* two escape characters => one space */
708                 c = 0x20;
709                 escaped = 0;
710             } else {
711                 escaped = 1;
712                 continue;
713             }
714         }
715         else
716         {
717             if (c >= 0x80) {
718                 c       = 0x20;
719                 escaped = 0;
720             } else if (escaped) {
721                 c = gsm7bits_extend_to_unicode[c];
722             } else
723                 c = gsm7bits_to_unicode[c];
724         }
725 
726         result += utf8_write( utf8, result, c );
727     }
728     return  result;
729 }
730 
731 /* convert a GSM 7-bit message into a unicode character array
732  * the 'dst' array must contain at least 160 chars. the function
733  * returns the number of characters decoded
734  *
735  * assumes the 'dst' array has at least septet_count items, returns the
736  * number of unichars really written
737  */
738 int
ucs2_from_gsm7(bytes_t ucs2,cbytes_t src,int septet_offset,int septet_count)739 ucs2_from_gsm7( bytes_t   ucs2,
740                 cbytes_t  src,
741                 int       septet_offset,
742                 int       septet_count )
743 {
744     const unsigned char*  p     = src + (septet_offset >> 3);
745     int                   shift = (septet_offset & 7);
746     int                   escaped = 0;
747     int                   result  = 0;
748 
749     for ( ; septet_count > 0; septet_count-- )
750     {
751         unsigned  val  = (p[0] >> shift) & 0x7f;
752 
753         if (shift > 1)
754             val = (val | (p[1] << (8-shift))) & 0x7f;
755 
756         if (escaped) {
757             int  c = gsm7bits_to_unicode[val];
758 
759             result += ucs2_write(ucs2, result, c);
760             escaped = 0;
761         }
762         else if (val == GSM_7BITS_ESCAPE) {
763             escaped = 1;
764         }
765         else {
766             val = gsm7bits_extend_to_unicode[val];
767             if (val == 0)
768                 val = 0x20;
769 
770             result += ucs2_write( ucs2, result, val );
771         }
772     }
773     return result/2;
774 }
775 
776 
777 /* count the number of septets required to write a utf8 string */
778 static int
utf8_to_gsm7_count(cbytes_t utf8,int utf8len)779 utf8_to_gsm7_count( cbytes_t  utf8, int  utf8len )
780 {
781     cbytes_t  utf8end = utf8 + utf8len;
782     int       result  = 0;
783 
784     while ( utf8 < utf8end ) {
785         int  len;
786         int  c = utf8_next( &utf8, utf8end );
787 
788         if (c < 0)
789             break;
790 
791         len = unichar_to_gsm7_count(c);
792         if (len == 0)    /* replace non-representables with space */
793             len = 1;
794 
795         result += len;
796     }
797     return result;
798 }
799 
800 typedef struct {
801     bytes_t   dst;
802     unsigned  pad;
803     int       bits;
804     int       offset;
805 } BWriterRec, *BWriter;
806 
807 static void
bwriter_init(BWriter writer,bytes_t dst,int start)808 bwriter_init( BWriter  writer, bytes_t  dst, int  start )
809 {
810     int  shift = start & 7;
811 
812     writer->dst    = dst + (start >> 3);
813     writer->pad    = 0;
814     writer->bits   = shift;
815     writer->offset = start;
816 
817     if (shift > 0) {
818         writer->pad  = writer->dst[0] & ~(0xFF << shift);
819     }
820 }
821 
822 static void
bwriter_add7(BWriter writer,unsigned value)823 bwriter_add7( BWriter  writer, unsigned  value )
824 {
825     writer->pad  |= (unsigned)(value << writer->bits);
826     writer->bits += 7;
827     if (writer->bits >= 8) {
828         writer->dst[0] = (byte_t)writer->pad;
829         writer->bits  -= 8;
830         writer->pad  >>= 8;
831         writer->dst   += 1;
832     }
833     writer->offset += 7;
834 }
835 
836 static int
bwriter_done(BWriter writer)837 bwriter_done( BWriter  writer )
838 {
839     if (writer->bits > 0) {
840         writer->dst[0] = (byte_t)writer->pad;
841         writer->pad    = 0;
842         writer->bits   = 0;
843         writer->dst   += 1;
844     }
845     return writer->offset;
846 }
847 
848 /* convert a utf8 string to a gsm7 byte string - return the number of septets written */
849 int
utf8_to_gsm7(cbytes_t utf8,int utf8len,bytes_t dst,int offset)850 utf8_to_gsm7( cbytes_t  utf8, int  utf8len, bytes_t  dst, int offset )
851 {
852     const unsigned char*  utf8end = utf8 + utf8len;
853     BWriterRec            writer[1];
854 
855     if (dst == NULL)
856         return utf8_to_gsm7_count(utf8, utf8len);
857 
858     bwriter_init( writer, dst, offset );
859     while ( utf8 < utf8end ) {
860         int  c = utf8_next( &utf8, utf8end );
861         int  nn;
862 
863         if (c < 0)
864             break;
865 
866         nn = unichar_to_gsm7(c);
867         if (nn >= 0) {
868             bwriter_add7( writer, nn );
869             continue;
870         }
871 
872         nn = unichar_to_gsm7_extend(c);
873         if (nn >= 0) {
874             bwriter_add7( writer, GSM_7BITS_ESCAPE );
875             bwriter_add7( writer, nn );
876             continue;
877         }
878 
879         /* unknown => replaced by space */
880         bwriter_add7( writer, 0x20 );
881     }
882     return  bwriter_done( writer );
883 }
884 
885 
886 int
utf8_to_gsm8(cbytes_t utf8,int utf8len,bytes_t dst)887 utf8_to_gsm8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
888 {
889     const unsigned char*  utf8end = utf8 + utf8len;
890     int                   result  = 0;
891 
892     while ( utf8 < utf8end ) {
893         int  c = utf8_next( &utf8, utf8end );
894         int  nn;
895 
896         if (c < 0)
897             break;
898 
899         nn = unichar_to_gsm7(c);
900         if (nn >= 0) {
901             if (dst)
902                 dst[result] = (byte_t)nn;
903             result += 1;
904             continue;
905         }
906 
907         nn = unichar_to_gsm7_extend(c);
908         if (nn >= 0) {
909             if (dst) {
910                 dst[result+0] = (byte_t) GSM_7BITS_ESCAPE;
911                 dst[result+1] = (byte_t) nn;
912             }
913             result += 2;
914             continue;
915         }
916 
917         /* unknown => space */
918         if (dst)
919             dst[result] = 0x20;
920         result += 1;
921     }
922     return  result;
923 }
924 
925 
926 int
ucs2_to_gsm7(cbytes_t ucs2,int ucs2len,bytes_t dst,int offset)927 ucs2_to_gsm7( cbytes_t  ucs2, int  ucs2len, bytes_t  dst, int offset )
928 {
929     const unsigned char*  ucs2end = ucs2 + ucs2len*2;
930     BWriterRec            writer[1];
931 
932     bwriter_init( writer, dst, offset );
933     while ( ucs2 < ucs2end ) {
934         int  c = *ucs2++;
935         int  nn;
936 
937         for (nn = 0; nn < 128; nn++) {
938             if ( gsm7bits_to_unicode[nn] == c ) {
939                 bwriter_add7( writer, nn );
940                 goto NextUnicode;
941             }
942         }
943         for (nn = 0; nn < 128; nn++) {
944             if ( gsm7bits_extend_to_unicode[nn] == c ) {
945                 bwriter_add7( writer, GSM_7BITS_ESCAPE );
946                 bwriter_add7( writer, nn );
947                 goto NextUnicode;
948             }
949         }
950 
951         /* unknown */
952         bwriter_add7( writer, 0x20 );
953 
954     NextUnicode:
955         ;
956     }
957     return  bwriter_done( writer );
958 }
959 
960 
961 int
ucs2_to_gsm8(cbytes_t ucs2,int ucs2len,bytes_t dst)962 ucs2_to_gsm8( cbytes_t  ucs2, int  ucs2len, bytes_t  dst )
963 {
964     const unsigned char*  ucs2end = ucs2 + ucs2len*2;
965     bytes_t               dst0    = dst;
966 
967     while ( ucs2 < ucs2end ) {
968         int  c = *ucs2++;
969         int  nn;
970 
971         for (nn = 0; nn < 128; nn++) {
972             if ( gsm7bits_to_unicode[nn] == c ) {
973                 *dst++ = (byte_t)nn;
974                 goto NextUnicode;
975             }
976         }
977         for (nn = 0; nn < 128; nn++) {
978             if ( gsm7bits_extend_to_unicode[nn] == c ) {
979                 dst[0] = (byte_t) GSM_7BITS_ESCAPE;
980                 dst[1] = (byte_t) nn;
981                 dst   += 2;
982                 goto NextUnicode;
983             }
984         }
985 
986         /* unknown */
987         *dst++ = 0x20;
988 
989     NextUnicode:
990         ;
991     }
992     return (dst - dst0);
993 }
994 
995 int
gsm_bcdnum_to_ascii(cbytes_t bcd,int count,bytes_t dst)996 gsm_bcdnum_to_ascii( cbytes_t  bcd, int  count, bytes_t  dst )
997 {
998     int  result = 0;
999     int  shift  = 0;
1000 
1001     while (count > 0) {
1002         int  c = (bcd[0] >> shift) & 0xf;
1003 
1004         if (c == 15 && count == 1)  /* ignore trailing 0xf */
1005             break;
1006 
1007         if (c >= 14)
1008             c = 0;
1009 
1010         if (dst) dst[result] = "0123456789*#,N"[c];
1011         result += 1;
1012 
1013         shift += 4;
1014         if (shift == 8) {
1015             shift = 0;
1016             bcd += 1;
1017         }
1018     }
1019     return  result;
1020 }
1021 
1022 
1023 int
gsm_bcdnum_from_ascii(cbytes_t ascii,int asciilen,bytes_t dst)1024 gsm_bcdnum_from_ascii( cbytes_t  ascii, int  asciilen, bytes_t  dst )
1025 {
1026     cbytes_t  end = ascii + asciilen;
1027     int  result   = 0;
1028     int  phase = 0x01;
1029 
1030     while (ascii < end) {
1031         int  c = *ascii++;
1032 
1033         if (c == '*')
1034             c = 10;
1035         else if (c == '#')
1036             c = 11;
1037         else if (c == ',')
1038             c = 12;
1039         else if (c == 'N')
1040             c = 13;
1041         else {
1042             c -= '0';
1043             if ((unsigned)c >= 10U)
1044                 return -1;
1045         }
1046         phase   = (phase << 4) | c;
1047         result += 1;
1048         if (phase & 0x100) {
1049             if (dst) dst[result/2] = (byte_t) phase;
1050             phase   = 0x01;
1051         }
1052     }
1053 
1054     if (result & 1) {
1055         if (dst) dst[result/2] = (byte_t)(phase | 0xf0);
1056     }
1057     return result;
1058 }
1059 
1060 /** ADN: Abbreviated Dialing Number
1061  **/
1062 
1063 #define  ADN_FOOTER_SIZE     14
1064 #define  ADN_OFFSET_NUMBER_LENGTH   0
1065 #define  ADN_OFFSET_TON_NPI         1
1066 #define  ADN_OFFSET_NUMBER_START    2
1067 #define  ADN_OFFSET_NUMBER_END      11
1068 #define  ADN_OFFSET_CAPABILITY_ID   12
1069 #define  ADN_OFFSET_EXTENSION_ID    13
1070 
1071 /* see 10.5.1 of 3GPP 51.011 */
1072 static int
sim_adn_alpha_to_utf8(cbytes_t alpha,cbytes_t end,bytes_t dst)1073 sim_adn_alpha_to_utf8( cbytes_t  alpha, cbytes_t  end, bytes_t  dst )
1074 {
1075     int  result = 0;
1076 
1077     /* ignore trailing 0xff */
1078     while (alpha < end && end[-1] == 0xff)
1079         end--;
1080 
1081     if (alpha >= end)
1082         return 0;
1083 
1084     if (alpha[0] == 0x80) { /* UCS/2 source encoding */
1085         alpha += 1;
1086         result = ucs2_to_utf8( alpha, (end-alpha)/2, dst );
1087     }
1088     else
1089     {
1090         int  is_ucs2 = 0;
1091         int  len = 0, base = 0;
1092 
1093         if (alpha+3 <= end && alpha[0] == 0x81) {
1094             is_ucs2 = 1;
1095             len     = alpha[1];
1096             base    = alpha[2] << 7;
1097             alpha  += 3;
1098             if (len > end-alpha)
1099                 len = end-alpha;
1100         } else if (alpha+4 <= end && alpha[0] == 0x82) {
1101             is_ucs2 = 1;
1102             len     = alpha[1];
1103             base    = (alpha[2] << 8) | alpha[3];
1104             alpha  += 4;
1105             if (len > end-alpha)
1106                 len = end-alpha;
1107         }
1108 
1109         if (is_ucs2) {
1110             end = alpha + len;
1111             while (alpha < end) {
1112                 int  c = alpha[0];
1113                 if (c >= 0x80) {
1114                     result += utf8_write(dst, result, base + (c & 0x7f));
1115                     alpha  += 1;
1116                 } else {
1117                     /* GSM character set */
1118                     int   count;
1119                     for (count = 0; alpha+count < end && alpha[count] < 128; count++)
1120                         ;
1121                     result += utf8_from_gsm8(alpha, count, (dst ? dst+result : NULL));
1122                     alpha  += count;
1123                 }
1124             }
1125         }
1126         else {
1127             result = utf8_from_gsm8(alpha, end-alpha, dst);
1128         }
1129     }
1130     return result;
1131 }
1132 
1133 #if 0
1134 static int
1135 sim_adn_alpha_from_utf8( cbytes_t  utf8, int  utf8len, bytes_t  dst )
1136 {
1137     int   result = 0;
1138 
1139     if (utf8_check_gsm7(utf8, utf8len)) {
1140         /* GSM 7-bit compatible, encode directly as 8-bit string */
1141         result = utf8_to_gsm8(utf8, utf8len, dst);
1142     } else {
1143         /* otherwise, simply try UCS-2 encoding, nothing more serious at the moment */
1144         if (dst) {
1145             dst[0] = 0x80;
1146         }
1147         result = 1 + utf8_to_ucs2(utf8, utf8len, dst ? (dst+1) : NULL)*2;
1148     }
1149     return  result;
1150 }
1151 #endif
1152 
1153 int
sim_adn_record_from_bytes(SimAdnRecord rec,cbytes_t data,int len)1154 sim_adn_record_from_bytes( SimAdnRecord  rec, cbytes_t  data, int  len )
1155 {
1156     cbytes_t  end    = data + len;
1157     cbytes_t  footer = end - ADN_FOOTER_SIZE;
1158     int       num_len;
1159 
1160     rec->adn.alpha[0]  = 0;
1161     rec->adn.number[0] = 0;
1162     rec->ext_record    = 0xff;
1163 
1164     if (len < ADN_FOOTER_SIZE)
1165         return -1;
1166 
1167     /* alpha is optional */
1168     if (len > ADN_FOOTER_SIZE) {
1169         cbytes_t  dataend = data + len - ADN_FOOTER_SIZE;
1170         int       count   = sim_adn_alpha_to_utf8(data, dataend, NULL);
1171 
1172         if (count > sizeof(rec->adn.alpha)-1)  /* too long */
1173             return -1;
1174 
1175         sim_adn_alpha_to_utf8(data, dataend, rec->adn.alpha);
1176         rec->adn.alpha[count] = 0;
1177     }
1178 
1179     num_len = footer[ADN_OFFSET_NUMBER_LENGTH];
1180     if (num_len > 11)
1181         return -1;
1182 
1183     /* decode TON and number to ASCII, NOTE: this is lossy !! */
1184     {
1185         int      ton    = footer[ADN_OFFSET_TON_NPI];
1186         bytes_t  number = (bytes_t) rec->adn.number;
1187         int      len    = sizeof(rec->adn.number)-1;
1188         int      count;
1189 
1190         if (ton != 0x81 && ton != 0x91)
1191             return -1;
1192 
1193         if (ton == 0x91) {
1194             *number++ = '+';
1195             len      -= 1;
1196         }
1197 
1198         count = gsm_bcdnum_to_ascii( footer + ADN_OFFSET_NUMBER_START,
1199                                      num_len*2, number );
1200         number[count] = 0;
1201     }
1202     return 0;
1203 }
1204 
1205 int
sim_adn_record_to_bytes(SimAdnRecord rec,bytes_t data,int datalen)1206 sim_adn_record_to_bytes( SimAdnRecord  rec, bytes_t   data, int  datalen )
1207 {
1208     bytes_t   end    = data + datalen;
1209     bytes_t   footer = end - ADN_FOOTER_SIZE;
1210     int       ton    = 0x81;
1211     cbytes_t  number = (cbytes_t) rec->adn.number;
1212 
1213     if (number[0] == '+') {
1214         ton     = 0x91;
1215         number += 1;
1216     }
1217     footer[0] = (strlen((const char*)number)+1)/2 + 1;
1218     /* XXXX: TODO */
1219     return 0;
1220 }
1221