1 // ImGui - binary_to_compressed_c.cpp
2 // Helper tool to turn a file into a C array, if you want to embed font data in your source code.
3
4 // The data is first compressed with stb_compress() to reduce source code size,
5 // then encoded in Base85 to fit in a string so we can fit roughly 4 bytes of compressed data into 5 bytes of source code (suggested by @mmalex)
6 // (If we used 32-bits constants it would require take 11 bytes of source code to encode 4 bytes, and be endianness dependent)
7 // Note that even with compression, the output array is likely to be bigger than the binary file..
8 // Load compressed TTF fonts with ImGui::GetIO().Fonts->AddFontFromMemoryCompressedTTF()
9
10 // Build with, e.g:
11 // # cl.exe binary_to_compressed_c.cpp
12 // # gcc binary_to_compressed_c.cpp
13 // You can also find a precompiled Windows binary in the binary/demo package available from https://github.com/ocornut/imgui
14
15 // Usage:
16 // binary_to_compressed_c.exe [-base85] [-nocompress] <inputfile> <symbolname>
17 // Usage example:
18 // # binary_to_compressed_c.exe myfont.ttf MyFont > myfont.cpp
19 // # binary_to_compressed_c.exe -base85 myfont.ttf MyFont > myfont.cpp
20
21 #define _CRT_SECURE_NO_WARNINGS
22 #include <stdio.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <assert.h>
26
27 // stb_compress* from stb.h - declaration
28 typedef unsigned int stb_uint;
29 typedef unsigned char stb_uchar;
30 stb_uint stb_compress(stb_uchar *out,stb_uchar *in,stb_uint len);
31
32 static bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression);
33
main(int argc,char ** argv)34 int main(int argc, char** argv)
35 {
36 if (argc < 3)
37 {
38 printf("Syntax: %s [-base85] [-nocompress] <inputfile> <symbolname>\n", argv[0]);
39 return 0;
40 }
41
42 int argn = 1;
43 bool use_base85_encoding = false;
44 bool use_compression = true;
45 if (argv[argn][0] == '-')
46 {
47 if (strcmp(argv[argn], "-base85") == 0) { use_base85_encoding = true; argn++; }
48 else if (strcmp(argv[argn], "-nocompress") == 0) { use_compression = false; argn++; }
49 else
50 {
51 printf("Unknown argument: '%s'\n", argv[argn]);
52 return 1;
53 }
54 }
55
56 return binary_to_compressed_c(argv[argn], argv[argn+1], use_base85_encoding, use_compression) ? 0 : 1;
57 }
58
Encode85Byte(unsigned int x)59 char Encode85Byte(unsigned int x)
60 {
61 x = (x % 85) + 35;
62 return (x>='\\') ? x+1 : x;
63 }
64
binary_to_compressed_c(const char * filename,const char * symbol,bool use_base85_encoding,bool use_compression)65 bool binary_to_compressed_c(const char* filename, const char* symbol, bool use_base85_encoding, bool use_compression)
66 {
67 // Read file
68 FILE* f = fopen(filename, "rb");
69 if (!f) return false;
70 int data_sz;
71 if (fseek(f, 0, SEEK_END) || (data_sz = (int)ftell(f)) == -1 || fseek(f, 0, SEEK_SET)) { fclose(f); return false; }
72 char* data = new char[data_sz+4];
73 if (fread(data, 1, data_sz, f) != (size_t)data_sz) { fclose(f); delete[] data; return false; }
74 memset((void*)(((char*)data) + data_sz), 0, 4);
75 fclose(f);
76
77 // Compress
78 int maxlen = data_sz + 512 + (data_sz >> 2) + sizeof(int); // total guess
79 char* compressed = use_compression ? new char[maxlen] : data;
80 int compressed_sz = use_compression ? stb_compress((stb_uchar*)compressed, (stb_uchar*)data, data_sz) : data_sz;
81 if (use_compression)
82 memset(compressed + compressed_sz, 0, maxlen - compressed_sz);
83
84 // Output as Base85 encoded
85 FILE* out = stdout;
86 fprintf(out, "// File: '%s' (%d bytes)\n", filename, (int)data_sz);
87 fprintf(out, "// Exported using binary_to_compressed_c.cpp\n");
88 const char* compressed_str = use_compression ? "compressed_" : "";
89 if (use_base85_encoding)
90 {
91 fprintf(out, "static const char %s_%sdata_base85[%d+1] =\n \"", symbol, compressed_str, (int)((compressed_sz+3)/4)*5);
92 char prev_c = 0;
93 for (int src_i = 0; src_i < compressed_sz; src_i += 4)
94 {
95 // This is made a little more complicated by the fact that ??X sequences are interpreted as trigraphs by old C/C++ compilers. So we need to escape pairs of ??.
96 unsigned int d = *(unsigned int*)(compressed + src_i);
97 for (unsigned int n5 = 0; n5 < 5; n5++, d /= 85)
98 {
99 char c = Encode85Byte(d);
100 fprintf(out, (c == '?' && prev_c == '?') ? "\\%c" : "%c", c);
101 prev_c = c;
102 }
103 if ((src_i % 112) == 112-4)
104 fprintf(out, "\"\n \"");
105 }
106 fprintf(out, "\";\n\n");
107 }
108 else
109 {
110 fprintf(out, "static const unsigned int %s_%ssize = %d;\n", symbol, compressed_str, (int)compressed_sz);
111 fprintf(out, "static const unsigned int %s_%sdata[%d/4] =\n{", symbol, compressed_str, (int)((compressed_sz+3)/4)*4);
112 int column = 0;
113 for (int i = 0; i < compressed_sz; i += 4)
114 {
115 unsigned int d = *(unsigned int*)(compressed + i);
116 if ((column++ % 12) == 0)
117 fprintf(out, "\n 0x%08x, ", d);
118 else
119 fprintf(out, "0x%08x, ", d);
120 }
121 fprintf(out, "\n};\n\n");
122 }
123
124 // Cleanup
125 delete[] data;
126 if (use_compression)
127 delete[] compressed;
128 return true;
129 }
130
131 // stb_compress* from stb.h - definition
132
133 //////////////////// compressor ///////////////////////
134
stb_adler32(stb_uint adler32,stb_uchar * buffer,stb_uint buflen)135 static stb_uint stb_adler32(stb_uint adler32, stb_uchar *buffer, stb_uint buflen)
136 {
137 const unsigned long ADLER_MOD = 65521;
138 unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16;
139 unsigned long blocklen, i;
140
141 blocklen = buflen % 5552;
142 while (buflen) {
143 for (i=0; i + 7 < blocklen; i += 8) {
144 s1 += buffer[0], s2 += s1;
145 s1 += buffer[1], s2 += s1;
146 s1 += buffer[2], s2 += s1;
147 s1 += buffer[3], s2 += s1;
148 s1 += buffer[4], s2 += s1;
149 s1 += buffer[5], s2 += s1;
150 s1 += buffer[6], s2 += s1;
151 s1 += buffer[7], s2 += s1;
152
153 buffer += 8;
154 }
155
156 for (; i < blocklen; ++i)
157 s1 += *buffer++, s2 += s1;
158
159 s1 %= ADLER_MOD, s2 %= ADLER_MOD;
160 buflen -= blocklen;
161 blocklen = 5552;
162 }
163 return (s2 << 16) + s1;
164 }
165
stb_matchlen(stb_uchar * m1,stb_uchar * m2,stb_uint maxlen)166 static unsigned int stb_matchlen(stb_uchar *m1, stb_uchar *m2, stb_uint maxlen)
167 {
168 stb_uint i;
169 for (i=0; i < maxlen; ++i)
170 if (m1[i] != m2[i]) return i;
171 return i;
172 }
173
174 // simple implementation that just takes the source data in a big block
175
176 static stb_uchar *stb__out;
177 static FILE *stb__outfile;
178 static stb_uint stb__outbytes;
179
stb__write(unsigned char v)180 static void stb__write(unsigned char v)
181 {
182 fputc(v, stb__outfile);
183 ++stb__outbytes;
184 }
185
186 //#define stb_out(v) (stb__out ? *stb__out++ = (stb_uchar) (v) : stb__write((stb_uchar) (v)))
187 #define stb_out(v) do { if (stb__out) *stb__out++ = (stb_uchar) (v); else stb__write((stb_uchar) (v)); } while (0)
188
stb_out2(stb_uint v)189 static void stb_out2(stb_uint v) { stb_out(v >> 8); stb_out(v); }
stb_out3(stb_uint v)190 static void stb_out3(stb_uint v) { stb_out(v >> 16); stb_out(v >> 8); stb_out(v); }
stb_out4(stb_uint v)191 static void stb_out4(stb_uint v) { stb_out(v >> 24); stb_out(v >> 16); stb_out(v >> 8 ); stb_out(v); }
192
outliterals(stb_uchar * in,int numlit)193 static void outliterals(stb_uchar *in, int numlit)
194 {
195 while (numlit > 65536) {
196 outliterals(in,65536);
197 in += 65536;
198 numlit -= 65536;
199 }
200
201 if (numlit == 0) ;
202 else if (numlit <= 32) stb_out (0x000020 + numlit-1);
203 else if (numlit <= 2048) stb_out2(0x000800 + numlit-1);
204 else /* numlit <= 65536) */ stb_out3(0x070000 + numlit-1);
205
206 if (stb__out) {
207 memcpy(stb__out,in,numlit);
208 stb__out += numlit;
209 } else
210 fwrite(in, 1, numlit, stb__outfile);
211 }
212
213 static int stb__window = 0x40000; // 256K
214
stb_not_crap(int best,int dist)215 static int stb_not_crap(int best, int dist)
216 {
217 return ((best > 2 && dist <= 0x00100)
218 || (best > 5 && dist <= 0x04000)
219 || (best > 7 && dist <= 0x80000));
220 }
221
222 static stb_uint stb__hashsize = 32768;
223
224 // note that you can play with the hashing functions all you
225 // want without needing to change the decompressor
226 #define stb__hc(q,h,c) (((h) << 7) + ((h) >> 25) + q[c])
227 #define stb__hc2(q,h,c,d) (((h) << 14) + ((h) >> 18) + (q[c] << 7) + q[d])
228 #define stb__hc3(q,c,d,e) ((q[c] << 14) + (q[d] << 7) + q[e])
229
230 static unsigned int stb__running_adler;
231
stb_compress_chunk(stb_uchar * history,stb_uchar * start,stb_uchar * end,int length,int * pending_literals,stb_uchar ** chash,stb_uint mask)232 static int stb_compress_chunk(stb_uchar *history,
233 stb_uchar *start,
234 stb_uchar *end,
235 int length,
236 int *pending_literals,
237 stb_uchar **chash,
238 stb_uint mask)
239 {
240 (void)history;
241 int window = stb__window;
242 stb_uint match_max;
243 stb_uchar *lit_start = start - *pending_literals;
244 stb_uchar *q = start;
245
246 #define STB__SCRAMBLE(h) (((h) + ((h) >> 16)) & mask)
247
248 // stop short of the end so we don't scan off the end doing
249 // the hashing; this means we won't compress the last few bytes
250 // unless they were part of something longer
251 while (q < start+length && q+12 < end) {
252 int m;
253 stb_uint h1,h2,h3,h4, h;
254 stb_uchar *t;
255 int best = 2, dist=0;
256
257 if (q+65536 > end)
258 match_max = end-q;
259 else
260 match_max = 65536;
261
262 #define stb__nc(b,d) ((d) <= window && ((b) > 9 || stb_not_crap(b,d)))
263
264 #define STB__TRY(t,p) /* avoid retrying a match we already tried */ \
265 if (p ? dist != q-t : 1) \
266 if ((m = stb_matchlen(t, q, match_max)) > best) \
267 if (stb__nc(m,q-(t))) \
268 best = m, dist = q - (t)
269
270 // rather than search for all matches, only try 4 candidate locations,
271 // chosen based on 4 different hash functions of different lengths.
272 // this strategy is inspired by LZO; hashing is unrolled here using the
273 // 'hc' macro
274 h = stb__hc3(q,0, 1, 2); h1 = STB__SCRAMBLE(h);
275 t = chash[h1]; if (t) STB__TRY(t,0);
276 h = stb__hc2(q,h, 3, 4); h2 = STB__SCRAMBLE(h);
277 h = stb__hc2(q,h, 5, 6); t = chash[h2]; if (t) STB__TRY(t,1);
278 h = stb__hc2(q,h, 7, 8); h3 = STB__SCRAMBLE(h);
279 h = stb__hc2(q,h, 9,10); t = chash[h3]; if (t) STB__TRY(t,1);
280 h = stb__hc2(q,h,11,12); h4 = STB__SCRAMBLE(h);
281 t = chash[h4]; if (t) STB__TRY(t,1);
282
283 // because we use a shared hash table, can only update it
284 // _after_ we've probed all of them
285 chash[h1] = chash[h2] = chash[h3] = chash[h4] = q;
286
287 if (best > 2)
288 assert(dist > 0);
289
290 // see if our best match qualifies
291 if (best < 3) { // fast path literals
292 ++q;
293 } else if (best > 2 && best <= 0x80 && dist <= 0x100) {
294 outliterals(lit_start, q-lit_start); lit_start = (q += best);
295 stb_out(0x80 + best-1);
296 stb_out(dist-1);
297 } else if (best > 5 && best <= 0x100 && dist <= 0x4000) {
298 outliterals(lit_start, q-lit_start); lit_start = (q += best);
299 stb_out2(0x4000 + dist-1);
300 stb_out(best-1);
301 } else if (best > 7 && best <= 0x100 && dist <= 0x80000) {
302 outliterals(lit_start, q-lit_start); lit_start = (q += best);
303 stb_out3(0x180000 + dist-1);
304 stb_out(best-1);
305 } else if (best > 8 && best <= 0x10000 && dist <= 0x80000) {
306 outliterals(lit_start, q-lit_start); lit_start = (q += best);
307 stb_out3(0x100000 + dist-1);
308 stb_out2(best-1);
309 } else if (best > 9 && dist <= 0x1000000) {
310 if (best > 65536) best = 65536;
311 outliterals(lit_start, q-lit_start); lit_start = (q += best);
312 if (best <= 0x100) {
313 stb_out(0x06);
314 stb_out3(dist-1);
315 stb_out(best-1);
316 } else {
317 stb_out(0x04);
318 stb_out3(dist-1);
319 stb_out2(best-1);
320 }
321 } else { // fallback literals if no match was a balanced tradeoff
322 ++q;
323 }
324 }
325
326 // if we didn't get all the way, add the rest to literals
327 if (q-start < length)
328 q = start+length;
329
330 // the literals are everything from lit_start to q
331 *pending_literals = (q - lit_start);
332
333 stb__running_adler = stb_adler32(stb__running_adler, start, q - start);
334 return q - start;
335 }
336
stb_compress_inner(stb_uchar * input,stb_uint length)337 static int stb_compress_inner(stb_uchar *input, stb_uint length)
338 {
339 int literals = 0;
340 stb_uint len,i;
341
342 stb_uchar **chash;
343 chash = (stb_uchar**) malloc(stb__hashsize * sizeof(stb_uchar*));
344 if (chash == NULL) return 0; // failure
345 for (i=0; i < stb__hashsize; ++i)
346 chash[i] = NULL;
347
348 // stream signature
349 stb_out(0x57); stb_out(0xbc);
350 stb_out2(0);
351
352 stb_out4(0); // 64-bit length requires 32-bit leading 0
353 stb_out4(length);
354 stb_out4(stb__window);
355
356 stb__running_adler = 1;
357
358 len = stb_compress_chunk(input, input, input+length, length, &literals, chash, stb__hashsize-1);
359 assert(len == length);
360
361 outliterals(input+length - literals, literals);
362
363 free(chash);
364
365 stb_out2(0x05fa); // end opcode
366
367 stb_out4(stb__running_adler);
368
369 return 1; // success
370 }
371
stb_compress(stb_uchar * out,stb_uchar * input,stb_uint length)372 stb_uint stb_compress(stb_uchar *out, stb_uchar *input, stb_uint length)
373 {
374 stb__out = out;
375 stb__outfile = NULL;
376
377 stb_compress_inner(input, length);
378
379 return stb__out - out;
380 }
381