1 /*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /**
17 * @file picokfst.c
18 *
19 * FST knowledge loading and access
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28 #include "picoos.h"
29 #include "picodbg.h"
30 #include "picoknow.h"
31 #include "picokfst.h"
32
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 #if 0
37 }
38 #endif
39
40
41 #define FileHdrSize 4 /* size of FST file header */
42
43
44
45 /* ************************************************************/
46 /* function to create specialized kb, */
47 /* to be used by picorsrc only */
48 /* ************************************************************/
49
50 /** object : FSTKnowledgeBase
51 * shortcut : kfst
52 * derived from : picoknow_KnowledgeBase
53 */
54
55 typedef struct kfst_subobj * kfst_SubObj;
56
57 typedef struct kfst_subobj{
58 picoos_uint8 * fstStream; /* the byte stream base address */
59 picoos_int32 hdrLen; /* length of file header */
60 picoos_int32 transductionMode; /* transduction mode to be used for FST */
61 picoos_int32 nrClasses; /* nr of pair/transition classes in FST; class is in [1..nrClasses] */
62 picoos_int32 nrStates; /* nr of states in FST; state is in [1..nrState] */
63 picoos_int32 termClass; /* pair class of terminator symbol pair; probably obsolete */
64 picoos_int32 alphaHashTabSize; /* size of pair alphabet hash table */
65 picoos_int32 alphaHashTabPos; /* absolute address of the start of the pair alphabet */
66 picoos_int32 transTabEntrySize; /* size in bytes of each transition table entry */
67 picoos_int32 transTabPos; /* absolute address of the start of the transition table */
68 picoos_int32 inEpsStateTabPos; /* absolute address of the start of the input epsilon transition table */
69 picoos_int32 accStateTabPos; /* absolute address of the table of accepting states */
70 } kfst_subobj_t;
71
72
73
74 /* ************************************************************/
75 /* primitives for reading from byte stream */
76 /* ************************************************************/
77
78 /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into unsigned number 'num'.
79 '*pos' is modified to the position right after the number */
FixedBytesToUnsignedNum(picoos_uint8 * stream,picoos_uint8 nrBytes,picoos_uint32 * pos,picoos_uint32 * num)80 static void FixedBytesToUnsignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_uint32 * num)
81 {
82 picoos_int32 i;
83
84 (*num) = 0;
85 for (i = 0; i < nrBytes; i++) {
86 (*num) = ((*num) << 8) + (picoos_uint32)stream[*pos];
87 (*pos)++;
88 }
89 }
90
91
92 /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into signed number 'num'.
93 '*pos' is modified to the position right after the number */
FixedBytesToSignedNum(picoos_uint8 * stream,picoos_uint8 nrBytes,picoos_uint32 * pos,picoos_int32 * num)94 static void FixedBytesToSignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_int32 * num)
95 {
96 picoos_int32 i;
97 picoos_uint32 val;
98
99 val = 0;
100 for (i = 0; i < nrBytes; i++) {
101 val = (val << 8) + (picoos_uint32)stream[*pos];
102 (*pos)++;
103 }
104 if (val % 2 == 1) {
105 /* negative number */
106 (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
107 } else {
108 /* positive number */
109 (*num) = val / 2;
110 }
111 }
112
113
114 /* Converts varying-sized sequence of bytes starting at position '*pos' in byte stream 'stream'
115 into (signed) number 'num'. '*pos' is modified to the position right after the number. */
BytesToNum(picoos_uint8 * stream,picoos_uint32 * pos,picoos_int32 * num)116 static void BytesToNum (picoos_uint8 * stream, picoos_uint32 * pos, picoos_int32 * num)
117 {
118 picoos_uint32 val;
119 picoos_uint32 b;
120
121 val = 0;
122 b = (picoos_uint32)stream[*pos];
123 (*pos)++;
124 while (b < 128) {
125 val = (val << 7) + b;
126 b = (picoos_uint32)stream[*pos];
127 (*pos)++;
128 }
129 val = (val << 7) + (b - 128);
130 if (val % 2 == 1) {
131 /* negative number */
132 (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
133 } else {
134 /* positive number */
135 (*num) = val / 2;
136 }
137 }
138
139
140 /* ************************************************************/
141 /* setting up FST from byte stream */
142 /* ************************************************************/
143
kfstInitialize(register picoknow_KnowledgeBase this,picoos_Common common)144 static pico_status_t kfstInitialize(register picoknow_KnowledgeBase this,
145 picoos_Common common)
146 {
147 picoos_uint32 curpos;
148 picoos_int32 offs;
149 kfst_subobj_t * kfst;
150
151 PICODBG_DEBUG(("kfstInitialize -- start\n"));
152
153 if (NULL == this || NULL == this->subObj) {
154 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL,
155 NULL);
156 }
157 kfst = (kfst_subobj_t *) this->subObj;
158
159 /* +CT+ */
160 kfst->fstStream = this->base;
161 PICODBG_TRACE(("base: %d\n",this->base));
162 kfst->hdrLen = FileHdrSize;
163 curpos = kfst->hdrLen;
164 BytesToNum(kfst->fstStream,& curpos,& kfst->transductionMode);
165 BytesToNum(kfst->fstStream,& curpos,& kfst->nrClasses);
166 BytesToNum(kfst->fstStream,& curpos,& kfst->nrStates);
167 BytesToNum(kfst->fstStream,& curpos,& kfst->termClass);
168 BytesToNum(kfst->fstStream,& curpos,& kfst->alphaHashTabSize);
169 BytesToNum(kfst->fstStream,& curpos,& offs);
170 kfst->alphaHashTabPos = kfst->hdrLen + offs;
171 BytesToNum(kfst->fstStream,& curpos,& kfst->transTabEntrySize);
172 BytesToNum(kfst->fstStream,& curpos,& offs);
173 kfst->transTabPos = kfst->hdrLen + offs;
174 BytesToNum(kfst->fstStream,& curpos,& offs);
175 kfst->inEpsStateTabPos = kfst->hdrLen + offs;
176 BytesToNum(kfst->fstStream,& curpos,& offs);
177 kfst->accStateTabPos = kfst->hdrLen + offs;
178 /* -CT- */
179
180 return PICO_OK;
181 }
182
183
kfstSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)184 static pico_status_t kfstSubObjDeallocate(register picoknow_KnowledgeBase this,
185 picoos_MemoryManager mm)
186 {
187 if (NULL != this) {
188 picoos_deallocate(mm, (void *) &this->subObj);
189 }
190 return PICO_OK;
191 }
192
193
194 /* calculates a small number of data (e.g. addresses) from kb for fast access.
195 * This data is encapsulated in a picokfst_FST that can later be retrieved
196 * with picokfst_getFST. */
picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,picoos_Common common)197 pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
198 picoos_Common common)
199 {
200 pico_status_t status;
201
202 if (NULL == this) {
203 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL);
204 }
205 if (0 < this->size) {
206 /* not a dummy kb */
207 this->subDeallocate = kfstSubObjDeallocate;
208
209 this->subObj = picoos_allocate(common->mm, sizeof(kfst_subobj_t));
210
211 if (NULL == this->subObj) {
212 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
213 }
214 status = kfstInitialize(this, common);
215 if (PICO_OK != status) {
216 picoos_deallocate(common->mm,(void **)&this->subObj);
217 }
218 }
219 return PICO_OK;
220 }
221
222
223 /* ************************************************************/
224 /* FST type and getFST function */
225 /* ************************************************************/
226
227
228
229 /* return kb FST for usage in PU */
picokfst_getFST(picoknow_KnowledgeBase this)230 picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this)
231 {
232 if (NULL == this) {
233 return NULL;
234 } else {
235 return (picokfst_FST) this->subObj;
236 }
237 }
238
239
240
241 /* ************************************************************/
242 /* FST access methods */
243 /* ************************************************************/
244
245
246 /* see description in header file */
picokfst_kfstGetTransductionMode(picokfst_FST this)247 extern picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this)
248 {
249 kfst_SubObj fst = (kfst_SubObj) this;
250 if (fst != NULL) {
251 return fst->transductionMode;
252 } else {
253 return 0;
254 }
255 }
256
257
258 /* see description in header file */
picokfst_kfstGetFSTSizes(picokfst_FST this,picoos_int32 * nrStates,picoos_int32 * nrClasses)259 extern void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses)
260 {
261 kfst_SubObj fst = (kfst_SubObj) this;
262 if (fst != NULL) {
263 *nrStates = fst->nrStates;
264 *nrClasses = fst->nrClasses;
265 } else {
266 *nrStates = 0;
267 *nrClasses = 0;
268 }
269 }
270
271 /* see description in header file */
picokfst_kfstStartPairSearch(picokfst_FST this,picokfst_symid_t inSym,picoos_bool * inSymFound,picoos_int32 * searchState)272 extern void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
273 picoos_bool * inSymFound, picoos_int32 * searchState)
274 {
275 picoos_uint32 pos;
276 picoos_int32 offs;
277 picoos_int32 h;
278 picoos_int32 inSymCellPos;
279 picoos_int32 inSymX;
280 picoos_int32 nextSameHashInSymOffs;
281
282 kfst_SubObj fst = (kfst_SubObj) this;
283 (*searchState) = -1;
284 (*inSymFound) = 0;
285 h = inSym % fst->alphaHashTabSize;
286 pos = fst->alphaHashTabPos + (h * 4);
287 FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
288 if (offs > 0) {
289 inSymCellPos = fst->alphaHashTabPos + offs;
290 pos = inSymCellPos;
291 BytesToNum(fst->fstStream,& pos,& inSymX);
292 BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
293 while ((inSymX != inSym) && (nextSameHashInSymOffs > 0)) {
294 inSymCellPos = inSymCellPos + nextSameHashInSymOffs;
295 pos = inSymCellPos;
296 BytesToNum(fst->fstStream,& pos,& inSymX);
297 BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
298 }
299 if (inSymX == inSym) {
300 /* input symbol found; state is set to position after symbol cell */
301 (*searchState) = pos;
302 (*inSymFound) = 1;
303 }
304 }
305 }
306
307
308 /* see description in header file */
picokfst_kfstGetNextPair(picokfst_FST this,picoos_int32 * searchState,picoos_bool * pairFound,picokfst_symid_t * outSym,picokfst_class_t * pairClass)309 extern void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
310 picoos_bool * pairFound,
311 picokfst_symid_t * outSym, picokfst_class_t * pairClass)
312 {
313 picoos_uint32 pos;
314 picoos_int32 val;
315
316 kfst_SubObj fst = (kfst_SubObj) this;
317 if ((*searchState) < 0) {
318 (*pairFound) = 0;
319 (*outSym) = PICOKFST_SYMID_ILLEG;
320 (*pairClass) = -1;
321 } else {
322 pos = (*searchState);
323 BytesToNum(fst->fstStream,& pos,& val);
324 *outSym = (picokfst_symid_t)val;
325 if ((*outSym) != PICOKFST_SYMID_ILLEG) {
326 BytesToNum(fst->fstStream,& pos,& val);
327 *pairClass = (picokfst_class_t)val;
328 (*pairFound) = 1;
329 (*searchState) = pos;
330 } else {
331 (*pairFound) = 0;
332 (*outSym) = PICOKFST_SYMID_ILLEG;
333 (*pairClass) = -1;
334 (*searchState) = -1;
335 }
336 }
337 }
338
339
340
341 /* see description in header file */
picokfst_kfstGetTrans(picokfst_FST this,picokfst_state_t startState,picokfst_class_t transClass,picokfst_state_t * endState)342 extern void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
343 picokfst_state_t * endState)
344 {
345
346 picoos_uint32 pos;
347 picoos_int32 index;
348 picoos_uint32 endStateX;
349
350 kfst_SubObj fst = (kfst_SubObj) this;
351 if ((startState < 1) || (startState > fst->nrStates) || (transClass < 1) || (transClass > fst->nrClasses)) {
352 (*endState) = 0;
353 } else {
354 index = (startState - 1) * fst->nrClasses + transClass - 1;
355 pos = fst->transTabPos + (index * fst->transTabEntrySize);
356 FixedBytesToUnsignedNum(fst->fstStream,fst->transTabEntrySize,& pos,& endStateX);
357 (*endState) = endStateX;
358 }
359 }
360
361
362 /* see description in header file */
picokfst_kfstStartInEpsTransSearch(picokfst_FST this,picokfst_state_t startState,picoos_bool * inEpsTransFound,picoos_int32 * searchState)363 extern void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
364 picoos_bool * inEpsTransFound, picoos_int32 * searchState)
365 {
366
367 picoos_int32 offs;
368 picoos_uint32 pos;
369
370 kfst_SubObj fst = (kfst_SubObj) this;
371 (*searchState) = -1;
372 (*inEpsTransFound) = 0;
373 if ((startState > 0) && (startState <= fst->nrStates)) {
374 pos = fst->inEpsStateTabPos + (startState - 1) * 4;
375 FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
376 if (offs > 0) {
377 (*searchState) = fst->inEpsStateTabPos + offs;
378 (*inEpsTransFound) = 1;
379 }
380 }
381 }
382
383
384
385 /* see description in header file */
picokfst_kfstGetNextInEpsTrans(picokfst_FST this,picoos_int32 * searchState,picoos_bool * inEpsTransFound,picokfst_symid_t * outSym,picokfst_state_t * endState)386 extern void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
387 picoos_bool * inEpsTransFound,
388 picokfst_symid_t * outSym, picokfst_state_t * endState)
389 {
390 picoos_uint32 pos;
391 picoos_int32 val;
392
393 kfst_SubObj fst = (kfst_SubObj) this;
394 if ((*searchState) < 0) {
395 (*inEpsTransFound) = 0;
396 (*outSym) = PICOKFST_SYMID_ILLEG;
397 (*endState) = 0;
398 } else {
399 pos = (*searchState);
400 BytesToNum(fst->fstStream,& pos,& val);
401 *outSym = (picokfst_symid_t)val;
402 if ((*outSym) != PICOKFST_SYMID_ILLEG) {
403 BytesToNum(fst->fstStream,& pos,& val);
404 *endState = (picokfst_state_t)val;
405 (*inEpsTransFound) = 1;
406 (*searchState) = pos;
407 } else {
408 (*inEpsTransFound) = 0;
409 (*outSym) = PICOKFST_SYMID_ILLEG;
410 (*endState) = 0;
411 (*searchState) = -1;
412 }
413 }
414 }
415
416
417 /* see description in header file */
picokfst_kfstIsAcceptingState(picokfst_FST this,picokfst_state_t state)418 extern picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state)
419 {
420
421 picoos_uint32 pos;
422 picoos_uint32 val;
423
424 kfst_SubObj fst = (kfst_SubObj) this;
425 if ((state > 0) && (state <= fst->nrStates)) {
426 pos = fst->accStateTabPos + (state - 1);
427 FixedBytesToUnsignedNum(fst->fstStream,1,& pos,& val);
428 return (val == 1);
429 } else {
430 return 0;
431 }
432 }
433
434 #ifdef __cplusplus
435 }
436 #endif
437
438 /* End picofst.c */
439