1 #include <string.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4
5 #include "hyphen.h"
6 #include "csutil.h"
7
8 #define BUFSIZE 1000
9
help()10 void help() {
11 fprintf(stderr,"correct syntax is:\n");
12 fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
13 fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
14 fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
15 }
16
17 /* get a pointer to the nth 8-bit or UTF-8 character of the word */
hindex(char * word,int n,int utf8)18 char * hindex(char * word, int n, int utf8) {
19 int j = 0;
20 while (j < n) {
21 j++;
22 word++;
23 while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
24 }
25 return word;
26 }
27
28 /* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
single_hyphenations(char * word,char * hyphen,char ** rep,int * pos,int * cut,int utf8)29 void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
30 int i, k, j = 0;
31 char r;
32 for (i = 0; (i + 1) < strlen(word); i++) {
33 if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
34 if ((hyphen[j] & 1)) {
35 if (rep && rep[j]) {
36 k = hindex(word, j - pos[j] + 1, utf8) - word;
37 r = word[k];
38 word[k] = 0;
39 printf(" - %s%s", word, rep[j]);
40 word[k] = r;
41 printf("%s\n", hindex(word + k, cut[j], utf8));
42 } else {
43 k = hindex(word, j + 1, utf8) - word;
44 r = word[k];
45 word[k] = 0;
46 printf(" - %s=", word);
47 word[k] = r;
48 printf("%s\n", word + k);
49 }
50 }
51 j++;
52 }
53 }
54
55 int
main(int argc,char ** argv)56 main(int argc, char** argv)
57 {
58
59 HyphenDict *dict;
60 int df;
61 int wtc;
62 FILE* wtclst;
63 int k, n, i, j, c;
64 char buf[BUFSIZE + 1];
65 int nHyphCount;
66 char *hyphens;
67 char *lcword;
68 char *hyphword;
69 char hword[BUFSIZE * 2];
70 int arg = 1;
71 int optd = 1;
72 int optdd = 0;
73 char ** rep;
74 int * pos;
75 int * cut;
76
77 /* first parse the command line options */
78 /* arg1 - hyphen dictionary file, arg2 - file of words to check */
79
80 if (argv[arg]) {
81 if (strcmp(argv[arg], "-o") == 0) {
82 optd = 0;
83 arg++;
84 }
85 if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
86 optd = 1;
87 optdd = 1;
88 arg++;
89 }
90 }
91
92 if (argv[arg]) {
93 df = arg++;
94 } else {
95 help();
96 exit(1);
97 }
98
99 if (argv[arg]) {
100 wtc = arg++;
101 } else {
102 help();
103 exit(1);
104 }
105
106 /* load the hyphenation dictionary */
107 if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
108 fprintf(stderr, "Couldn't find file %s\n", argv[df]);
109 fflush(stderr);
110 exit(1);
111 }
112
113 /* open the words to check list */
114 wtclst = fopen(argv[wtc],"r");
115 if (!wtclst) {
116 fprintf(stderr,"Error - could not open file of words to check\n");
117 exit(1);
118 }
119
120
121 /* now read each word from the wtc file */
122 while(fgets(buf,BUFSIZE,wtclst)) {
123 k = strlen(buf);
124 if (buf[k - 1] == '\n') buf[k - 1] = '\0';
125 if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0';
126
127 /* set aside some buffers to hold lower cased */
128 /* and hyphen information */
129 lcword = (char *) malloc(k+1);
130 hyphens = (char *)malloc(k+5);
131 if (dict->utf8) {
132 strcpy(lcword, buf);
133 } else {
134 enmkallsmall(lcword,buf,dict->cset);
135 }
136
137 /* first remove any trailing periods */
138 n = k-1;
139 while((n >=0) && (lcword[n] == '.')) n--;
140 n++;
141
142 /* now actually try to hyphenate the word */
143
144 rep = NULL;
145 pos = NULL;
146 cut = NULL;
147 hword[0] = '\0';
148
149 if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
150 (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
151 free(hyphens);
152 free(lcword);
153 fprintf(stderr, "hyphenation error\n");
154 exit(1);
155 }
156
157 if (!optd) {
158 /* now backfill hyphens[] for any removed periods */
159 for (c = n; c < k; c++) hyphens[c] = '0';
160 hyphens[k] = '\0';
161
162 /* now create a new char string showing hyphenation positions */
163 /* count the hyphens and allocate space for the new hypehanted string */
164 nHyphCount = 0;
165 for (i = 0; i < n; i++)
166 if (hyphens[i]&1)
167 nHyphCount++;
168 hyphword = (char *) malloc(k+1+nHyphCount);
169 j = 0;
170 for (i = 0; i < n; i++) {
171 hyphword[j++] = buf[i];
172 if (hyphens[i]&1) {
173 hyphword[j++] = '-';
174 }
175 }
176 hyphword[j] = '\0';
177 fprintf(stdout,"%s\n",hyphword);
178 fflush(stdout);
179 free(hyphword);
180 } else {
181 fprintf(stdout,"%s\n", hword);
182 if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
183 if (rep) {
184 for (i = 0; i < n - 1; i++) {
185 if (rep[i]) free(rep[i]);
186 }
187 free(rep);
188 free(pos);
189 free(cut);
190 }
191 }
192 free(hyphens);
193 free(lcword);
194 }
195
196 fclose(wtclst);
197 hnj_hyphen_free(dict);
198 return 0;
199 }
200