• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001
2Date: Tue, 1 Jun 2010 17:27:23 -0700
3Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer.
4
5Previous file reading is kept and enhanced with mmap.
6
7This is the prepration for reading the dictionary from asset.
8
9issue: 2672163
10Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857
11---
12 hyphen.c |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
13 hyphen.h |    2 +
14 2 files changed, 60 insertions(+), 12 deletions(-)
15
16diff --git a/hyphen.c b/hyphen.c
17index 974d87f..446d5bd 100644
18--- a/hyphen.c
19+++ b/hyphen.c
20@@ -36,13 +36,13 @@
21  * MPL.
22  *
23  */
24+#include <fcntl.h>
25+#include <sys/mman.h>
26+#include <sys/stat.h>
27 #include <stdlib.h> /* for NULL, malloc */
28 #include <stdio.h>  /* for fprintf */
29 #include <string.h> /* for strdup */
30-
31-#ifdef UNX
32-#include <unistd.h> /* for exit */
33-#endif
34+#include <unistd.h> /* for close */
35
36 #define noVERBOSE
37
38@@ -230,12 +230,57 @@ get_state_str (int state)
39 }
40 #endif
41
42+// Get a line from the dictionary contents.
43+static char *
44+get_line (char *s, int size, const char *dict_contents, int dict_length,
45+    int *dict_ptr)
46+{
47+    int len = 0;
48+    while (len < (size - 1) && *dict_ptr < dict_length) {
49+        s[len++] = *(dict_contents + *dict_ptr);
50+        (*dict_ptr)++;
51+        if (s[len - 1] == '\n')
52+            break;
53+    }
54+    s[len] = '\0';
55+    if (len > 0) {
56+        return s;
57+    } else {
58+        return NULL;
59+    }
60+}
61+
62 HyphenDict *
63 hnj_hyphen_load (const char *fn)
64 {
65+    if (fn == NULL)
66+        return NULL;
67+    const int fd = open(fn, O_RDONLY);
68+    if (fd == -1)
69+        return NULL;
70+    struct stat sb;
71+    if (fstat(fd, &sb) == -1)  {  /* To obtain file size */
72+        close(fd);
73+        return NULL;
74+    }
75+
76+    const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
77+    if (addr == MAP_FAILED) {
78+        close(fd);
79+        return NULL;
80+    }
81+    HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size);
82+    munmap((void *)addr, sb.st_size);
83+    close(fd);
84+
85+    return dict;
86+}
87+
88+HyphenDict *
89+hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length)
90+{
91     HyphenDict *dict[2];
92     HashTab *hashtab;
93-    FILE *f;
94     char buf[MAX_CHARS];
95     char word[MAX_CHARS];
96     char pattern[MAX_CHARS];
97@@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn)
98     HashEntry *e;
99     int nextlevel = 0;
100
101-    f = fopen (fn, "r");
102-    if (f == NULL)
103+    if (dict_contents == NULL)
104         return NULL;
105
106+    int dict_ptr = 0;
107 // loading one or two dictionaries (separated by NEXTLEVEL keyword)
108     for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
109         hashtab = hnj_hash_new ();
110@@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn)
111         /* read in character set info */
112         if (k == 0) {
113             for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
114-            fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
115+            get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents,
116+                dict_length, &dict_ptr);
117             for (i=0;i<MAX_NAME;i++)
118                 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
119                     dict[k]->cset[i] = 0;
120@@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn)
121             dict[k]->utf8 = dict[0]->utf8;
122         }
123
124-        while (fgets (buf, sizeof(buf), f) != NULL)
125+        while (get_line(buf, sizeof(buf), dict_contents, dict_length,
126+                &dict_ptr) != NULL)
127         {
128             if (buf[0] != '%')
129             {
130@@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn)
131 #endif
132         state_num = 0;
133     }
134-    fclose(f);
135     if (k == 2) dict[0]->nextlevel = dict[1];
136     return dict[0];
137 }
138@@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
139             hyphens2 = hnj_malloc (word_size);
140         }
141         for (i = 0; i < word_size; i++) rep2[i] = NULL;
142-        for (i = 0; i < word_size; i++) if
143-                                            (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
144+        for (i = 0; i < word_size; i++)
145+            if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
146                 if (i - begin > 1) {
147                     int hyph = 0;
148                     prep_word[i + 2] = '\0';
149diff --git a/hyphen.h b/hyphen.h
150index 5d79308..29a0701 100644
151--- a/hyphen.h
152+++ b/hyphen.h
153@@ -91,6 +91,8 @@ struct _HyphenTrans {
154 };
155
156 HyphenDict *hnj_hyphen_load (const char *fn);
157+HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents,
158+    int dict_length);
159 void hnj_hyphen_free (HyphenDict *dict);
160
161 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
162--
1631.7.0.1
164
165