1 #include <stdio.h>
2 #include <sys/stat.h>
3 #include <string.h>
4
5 #include "unicode/locid.h"
6 #include "utils/Log.h"
7
8 #include <vector>
9 #include <minikin/Hyphenator.h>
10
11 using minikin::HyphenationType;
12 using minikin::Hyphenator;
13
loadHybFile(const char * fn,int minPrefix,int minSuffix)14 Hyphenator* loadHybFile(const char* fn, int minPrefix, int minSuffix) {
15 struct stat statbuf;
16 int status = stat(fn, &statbuf);
17 if (status < 0) {
18 fprintf(stderr, "error opening %s\n", fn);
19 return nullptr;
20 }
21 size_t size = statbuf.st_size;
22 FILE* f = fopen(fn, "rb");
23 if (f == NULL) {
24 fprintf(stderr, "error opening %s\n", fn);
25 return nullptr;
26 }
27 uint8_t* buf = new uint8_t[size];
28 size_t read_size = fread(buf, 1, size, f);
29 fclose(f);
30 if (read_size < size) {
31 fprintf(stderr, "error reading %s\n", fn);
32 delete[] buf;
33 return nullptr;
34 }
35 return Hyphenator::loadBinary(buf, minPrefix, minSuffix);
36 }
37
main(int argc,char ** argv)38 int main(int argc, char** argv) {
39 Hyphenator* hyph = loadHybFile("/tmp/en.hyb", 2, 3); // should also be configurable
40 std::vector<HyphenationType> result;
41 std::vector<uint16_t> word;
42 if (argc < 2) {
43 fprintf(stderr, "usage: hyphtool word\n");
44 return 1;
45 }
46 char* asciiword = argv[1];
47 size_t len = strlen(asciiword);
48 for (size_t i = 0; i < len; i++) {
49 uint32_t c = asciiword[i];
50 if (c == '-') {
51 c = 0x00AD;
52 }
53 // ASCII (or possibly ISO Latin 1), but kinda painful to do utf conversion :(
54 word.push_back(c);
55 }
56 hyph->hyphenate(&result, word.data(), word.size(), icu::Locale::getUS());
57 for (size_t i = 0; i < len; i++) {
58 if (result[i] != HyphenationType::DONT_BREAK) {
59 printf("-");
60 }
61 printf("%c", word[i]);
62 }
63 printf("\n");
64 return 0;
65 }
66