• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <string.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4 
5 #include "hyphen.h"
6 #include "csutil.h"
7 
8 #define BUFSIZE 1000
9 
help()10 void help() {
11     fprintf(stderr,"correct syntax is:\n");
12     fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
13     fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
14     fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
15 }
16 
17 /* get a pointer to the nth 8-bit or UTF-8 character of the word */
hindex(char * word,int n,int utf8)18 char * hindex(char * word, int n, int utf8) {
19     int j = 0;
20     while (j < n) {
21         j++;
22         word++;
23         while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
24     }
25     return word;
26 }
27 
28 /* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
single_hyphenations(char * word,char * hyphen,char ** rep,int * pos,int * cut,int utf8)29 void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
30     int i, k, j = 0;
31     char r;
32     for (i = 0; (i + 1) < strlen(word); i++) {
33         if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
34         if ((hyphen[j] & 1)) {
35             if (rep && rep[j]) {
36               k = hindex(word, j - pos[j] + 1, utf8) - word;
37               r = word[k];
38               word[k] = 0;
39               printf(" - %s%s", word, rep[j]);
40               word[k] = r;
41               printf("%s\n", hindex(word + k, cut[j], utf8));
42             } else {
43               k = hindex(word, j + 1, utf8) - word;
44               r = word[k];
45               word[k] = 0;
46               printf(" - %s=", word);
47               word[k] = r;
48               printf("%s\n", word + k);
49             }
50         }
51         j++;
52     }
53 }
54 
55 int
main(int argc,char ** argv)56 main(int argc, char** argv)
57 {
58 
59     HyphenDict *dict;
60     int df;
61     int wtc;
62     FILE* wtclst;
63     int k, n, i, j, c;
64     char buf[BUFSIZE + 1];
65     int  nHyphCount;
66     char *hyphens;
67     char *lcword;
68     char *hyphword;
69     char hword[BUFSIZE * 2];
70     int arg = 1;
71     int optd = 1;
72     int optdd = 0;
73     char ** rep;
74     int * pos;
75     int * cut;
76 
77   /* first parse the command line options */
78   /* arg1 - hyphen dictionary file, arg2 - file of words to check */
79 
80   if (argv[arg]) {
81        if (strcmp(argv[arg], "-o") == 0) {
82             optd = 0;
83             arg++;
84        }
85        if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
86             optd = 1;
87             optdd = 1;
88             arg++;
89        }
90   }
91 
92   if (argv[arg]) {
93        df = arg++;
94   } else {
95     help();
96     exit(1);
97   }
98 
99   if (argv[arg]) {
100        wtc = arg++;
101   } else {
102     help();
103     exit(1);
104   }
105 
106   /* load the hyphenation dictionary */
107   if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
108        fprintf(stderr, "Couldn't find file %s\n", argv[df]);
109        fflush(stderr);
110        exit(1);
111   }
112 
113   /* open the words to check list */
114   wtclst = fopen(argv[wtc],"r");
115   if (!wtclst) {
116     fprintf(stderr,"Error - could not open file of words to check\n");
117     exit(1);
118   }
119 
120 
121   /* now read each word from the wtc file */
122     while(fgets(buf,BUFSIZE,wtclst)) {
123        k = strlen(buf);
124        if (buf[k - 1] == '\n') buf[k - 1] = '\0';
125        if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0';
126 
127        /* set aside some buffers to hold lower cased */
128        /* and hyphen information */
129        lcword = (char *) malloc(k+1);
130        hyphens = (char *)malloc(k+5);
131        if (dict->utf8) {
132          strcpy(lcword, buf);
133        } else {
134          enmkallsmall(lcword,buf,dict->cset);
135        }
136 
137        /* first remove any trailing periods */
138        n = k-1;
139        while((n >=0) && (lcword[n] == '.')) n--;
140        n++;
141 
142        /* now actually try to hyphenate the word */
143 
144        rep = NULL;
145        pos = NULL;
146        cut = NULL;
147        hword[0] = '\0';
148 
149        if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
150 	    (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
151              free(hyphens);
152              free(lcword);
153              fprintf(stderr, "hyphenation error\n");
154              exit(1);
155        }
156 
157        if (!optd) {
158          /* now backfill hyphens[] for any removed periods */
159          for (c = n; c < k; c++) hyphens[c] = '0';
160          hyphens[k] = '\0';
161 
162          /* now create a new char string showing hyphenation positions */
163          /* count the hyphens and allocate space for the new hypehanted string */
164          nHyphCount = 0;
165          for (i = 0; i < n; i++)
166            if (hyphens[i]&1)
167              nHyphCount++;
168          hyphword = (char *) malloc(k+1+nHyphCount);
169          j = 0;
170          for (i = 0; i < n; i++) {
171 	   hyphword[j++] = buf[i];
172            if (hyphens[i]&1) {
173 	      hyphword[j++] = '-';
174 	   }
175          }
176          hyphword[j] = '\0';
177          fprintf(stdout,"%s\n",hyphword);
178          fflush(stdout);
179          free(hyphword);
180       } else {
181          fprintf(stdout,"%s\n", hword);
182          if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
183          if (rep) {
184             for (i = 0; i < n - 1; i++) {
185                 if (rep[i]) free(rep[i]);
186             }
187             free(rep);
188             free(pos);
189             free(cut);
190          }
191       }
192       free(hyphens);
193       free(lcword);
194     }
195 
196     fclose(wtclst);
197     hnj_hyphen_free(dict);
198     return 0;
199 }
200