• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* unicode.c - convert between Unicode and UTF-8
2  *
3  * Copyright 2020 The Android Open Source Project.
4  *
5  * Loosely based on the Plan9/Inferno unicode(1).
6 
7 USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))
8 
9 config UNICODE
10   bool "unicode"
11   default n
12   help
13     usage: unicode [[min]-max]
14 
15     Convert between Unicode code points and UTF-8, in both directions.
16 */
17 
18 #define FOR_unicode
19 #include "toys.h"
20 
codepoint(unsigned wc)21 static void codepoint(unsigned wc) {
22   char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
23             "NAKSYNETBCANEM SUBESCFS GS RS US ";
24   unsigned n, i;
25 
26   printf("U+%04X : ", wc);
27   if (wc < ' ') printf("%.3s", low+(wc*3));
28   else if (wc == 0x7f) printf("DEL");
29   else {
30     toybuf[n = wctoutf8(toybuf, wc)] = 0;
31     printf("%s%s", toybuf, n>1 ? " :":"");
32     if (n>1) for (i = 0; i < n; i++) printf(" %#02x", toybuf[i]);
33   }
34   xputc('\n');
35 }
36 
unicode_main(void)37 void unicode_main(void)
38 {
39   unsigned from, to;
40   char next, **args;
41 
42   for (args = toys.optargs; *args; args++) {
43     // unicode 660-666 => table of `U+0600 : ٠ : 0xd9 0xa0` etc.
44     if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2) {
45       while (from <= to) codepoint(from++);
46 
47     // unicode 666 => just `U+0666 : ٦ : 0xd9 0xa6`.
48     } else if (sscanf(*args, "%x%c", &from, &next) == 1) {
49       codepoint(from);
50 
51     // unicode hello => table showing every character in the string.
52     } else {
53       char *s = *args;
54       size_t l = strlen(s);
55       wchar_t wc;
56       int n;
57 
58       while ((n = utf8towc(&wc, s, l)) > 0) {
59         codepoint(wc);
60         s += n;
61         l -= n;
62       }
63     }
64   }
65 }
66