1From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001 2Date: Tue, 1 Jun 2010 17:27:23 -0700 3Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer. 4 5Previous file reading is kept and enhanced with mmap. 6 7This is the prepration for reading the dictionary from asset. 8 9issue: 2672163 10Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857 11--- 12 hyphen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++---------- 13 hyphen.h | 2 + 14 2 files changed, 60 insertions(+), 12 deletions(-) 15 16diff --git a/hyphen.c b/hyphen.c 17index 974d87f..446d5bd 100644 18--- a/hyphen.c 19+++ b/hyphen.c 20@@ -36,13 +36,13 @@ 21 * MPL. 22 * 23 */ 24+#include <fcntl.h> 25+#include <sys/mman.h> 26+#include <sys/stat.h> 27 #include <stdlib.h> /* for NULL, malloc */ 28 #include <stdio.h> /* for fprintf */ 29 #include <string.h> /* for strdup */ 30- 31-#ifdef UNX 32-#include <unistd.h> /* for exit */ 33-#endif 34+#include <unistd.h> /* for close */ 35 36 #define noVERBOSE 37 38@@ -230,12 +230,57 @@ get_state_str (int state) 39 } 40 #endif 41 42+// Get a line from the dictionary contents. 43+static char * 44+get_line (char *s, int size, const char *dict_contents, int dict_length, 45+ int *dict_ptr) 46+{ 47+ int len = 0; 48+ while (len < (size - 1) && *dict_ptr < dict_length) { 49+ s[len++] = *(dict_contents + *dict_ptr); 50+ (*dict_ptr)++; 51+ if (s[len - 1] == '\n') 52+ break; 53+ } 54+ s[len] = '\0'; 55+ if (len > 0) { 56+ return s; 57+ } else { 58+ return NULL; 59+ } 60+} 61+ 62 HyphenDict * 63 hnj_hyphen_load (const char *fn) 64 { 65+ if (fn == NULL) 66+ return NULL; 67+ const int fd = open(fn, O_RDONLY); 68+ if (fd == -1) 69+ return NULL; 70+ struct stat sb; 71+ if (fstat(fd, &sb) == -1) { /* To obtain file size */ 72+ close(fd); 73+ return NULL; 74+ } 75+ 76+ const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 77+ if (addr == MAP_FAILED) { 78+ close(fd); 79+ return NULL; 80+ } 81+ HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size); 82+ munmap((void *)addr, sb.st_size); 83+ close(fd); 84+ 85+ return dict; 86+} 87+ 88+HyphenDict * 89+hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length) 90+{ 91 HyphenDict *dict[2]; 92 HashTab *hashtab; 93- FILE *f; 94 char buf[MAX_CHARS]; 95 char word[MAX_CHARS]; 96 char pattern[MAX_CHARS]; 97@@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn) 98 HashEntry *e; 99 int nextlevel = 0; 100 101- f = fopen (fn, "r"); 102- if (f == NULL) 103+ if (dict_contents == NULL) 104 return NULL; 105 106+ int dict_ptr = 0; 107 // loading one or two dictionaries (separated by NEXTLEVEL keyword) 108 for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 109 hashtab = hnj_hash_new (); 110@@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn) 111 /* read in character set info */ 112 if (k == 0) { 113 for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; 114- fgets(dict[k]->cset, sizeof(dict[k]->cset),f); 115+ get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents, 116+ dict_length, &dict_ptr); 117 for (i=0;i<MAX_NAME;i++) 118 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) 119 dict[k]->cset[i] = 0; 120@@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn) 121 dict[k]->utf8 = dict[0]->utf8; 122 } 123 124- while (fgets (buf, sizeof(buf), f) != NULL) 125+ while (get_line(buf, sizeof(buf), dict_contents, dict_length, 126+ &dict_ptr) != NULL) 127 { 128 if (buf[0] != '%') 129 { 130@@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn) 131 #endif 132 state_num = 0; 133 } 134- fclose(f); 135 if (k == 2) dict[0]->nextlevel = dict[1]; 136 return dict[0]; 137 } 138@@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, 139 hyphens2 = hnj_malloc (word_size); 140 } 141 for (i = 0; i < word_size; i++) rep2[i] = NULL; 142- for (i = 0; i < word_size; i++) if 143- (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { 144+ for (i = 0; i < word_size; i++) 145+ if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { 146 if (i - begin > 1) { 147 int hyph = 0; 148 prep_word[i + 2] = '\0'; 149diff --git a/hyphen.h b/hyphen.h 150index 5d79308..29a0701 100644 151--- a/hyphen.h 152+++ b/hyphen.h 153@@ -91,6 +91,8 @@ struct _HyphenTrans { 154 }; 155 156 HyphenDict *hnj_hyphen_load (const char *fn); 157+HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents, 158+ int dict_length); 159 void hnj_hyphen_free (HyphenDict *dict); 160 161 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ 162-- 1631.7.0.1 164 165