• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *     http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  /**
17   * @file picokfst.c
18   *
19   * FST knowledge loading and access
20   *
21   * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22   * All rights reserved.
23   *
24   * History:
25   * - 2009-04-20 -- initial version
26   *
27   */
28  #include "picoos.h"
29  #include "picodbg.h"
30  #include "picoknow.h"
31  #include "picokfst.h"
32  
33  #ifdef __cplusplus
34  extern "C" {
35  #endif
36  #if 0
37  }
38  #endif
39  
40  
41  #define FileHdrSize 4       /* size of FST file header */
42  
43  
44  
45  /* ************************************************************/
46  /* function to create specialized kb, */
47  /* to be used by picorsrc only */
48  /* ************************************************************/
49  
50  /** object       : FSTKnowledgeBase
51   *  shortcut     : kfst
52   *  derived from : picoknow_KnowledgeBase
53   */
54  
55  typedef struct kfst_subobj * kfst_SubObj;
56  
57  typedef struct kfst_subobj{
58      picoos_uint8 * fstStream;         /* the byte stream base address */
59      picoos_int32 hdrLen;              /* length of file header */
60      picoos_int32 transductionMode;    /* transduction mode to be used for FST */
61      picoos_int32 nrClasses;           /* nr of pair/transition classes in FST; class is in [1..nrClasses] */
62      picoos_int32 nrStates;            /* nr of states in FST; state is in [1..nrState] */
63      picoos_int32 termClass;           /* pair class of terminator symbol pair; probably obsolete */
64      picoos_int32 alphaHashTabSize;    /* size of pair alphabet hash table */
65      picoos_int32 alphaHashTabPos;     /* absolute address of the start of the pair alphabet */
66      picoos_int32 transTabEntrySize;   /* size in bytes of each transition table entry */
67      picoos_int32 transTabPos;         /* absolute address of the start of the transition table */
68      picoos_int32 inEpsStateTabPos;    /* absolute address of the start of the input epsilon transition table */
69      picoos_int32 accStateTabPos;      /* absolute address of the table of accepting states */
70  } kfst_subobj_t;
71  
72  
73  
74  /* ************************************************************/
75  /* primitives for reading from byte stream */
76  /* ************************************************************/
77  
78  /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into unsigned number 'num'.
79     '*pos' is modified to the position right after the number */
FixedBytesToUnsignedNum(picoos_uint8 * stream,picoos_uint8 nrBytes,picoos_uint32 * pos,picoos_uint32 * num)80  static void FixedBytesToUnsignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_uint32 * num)
81  {
82      picoos_int32 i;
83  
84      (*num) = 0;
85      for (i = 0; i < nrBytes; i++) {
86          (*num) = ((*num) << 8) + (picoos_uint32)stream[*pos];
87          (*pos)++;
88      }
89  }
90  
91  
92  /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into signed number 'num'.
93     '*pos' is modified to the position right after the number */
FixedBytesToSignedNum(picoos_uint8 * stream,picoos_uint8 nrBytes,picoos_uint32 * pos,picoos_int32 * num)94  static void FixedBytesToSignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_int32 * num)
95  {
96      picoos_int32 i;
97      picoos_uint32 val;
98  
99      val = 0;
100      for (i = 0; i < nrBytes; i++) {
101          val = (val << 8) + (picoos_uint32)stream[*pos];
102          (*pos)++;
103      }
104      if (val % 2 == 1) {
105          /* negative number */
106          (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
107      } else {
108          /* positive number */
109          (*num) = val / 2;
110      }
111  }
112  
113  
114  /* Converts varying-sized sequence of bytes starting at position '*pos' in byte stream 'stream'
115     into (signed) number 'num'. '*pos' is modified to the position right after the number. */
BytesToNum(picoos_uint8 * stream,picoos_uint32 * pos,picoos_int32 * num)116  static void BytesToNum (picoos_uint8 * stream, picoos_uint32 * pos, picoos_int32 * num)
117  {
118      picoos_uint32 val;
119      picoos_uint32 b;
120  
121      val = 0;
122      b = (picoos_uint32)stream[*pos];
123      (*pos)++;
124      while (b < 128) {
125          val = (val << 7) + b;
126          b = (picoos_uint32)stream[*pos];
127          (*pos)++;
128      }
129      val = (val << 7) + (b - 128);
130      if (val % 2 == 1) {
131          /* negative number */
132          (*num) = -((picoos_int32)((val - 1) / 2)) - 1;
133      } else {
134          /* positive number */
135          (*num) = val / 2;
136      }
137  }
138  
139  
140  /* ************************************************************/
141  /* setting up FST from byte stream */
142  /* ************************************************************/
143  
kfstInitialize(register picoknow_KnowledgeBase this,picoos_Common common)144  static pico_status_t kfstInitialize(register picoknow_KnowledgeBase this,
145          picoos_Common common)
146  {
147      picoos_uint32 curpos;
148      picoos_int32 offs;
149      kfst_subobj_t * kfst;
150  
151      PICODBG_DEBUG(("kfstInitialize -- start\n"));
152  
153      if (NULL == this || NULL == this->subObj) {
154          return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL,
155                  NULL);
156      }
157      kfst = (kfst_subobj_t *) this->subObj;
158  
159      /* +CT+ */
160      kfst->fstStream = this->base;
161      PICODBG_TRACE(("base: %d\n",this->base));
162      kfst->hdrLen = FileHdrSize;
163      curpos = kfst->hdrLen;
164      BytesToNum(kfst->fstStream,& curpos,& kfst->transductionMode);
165      BytesToNum(kfst->fstStream,& curpos,& kfst->nrClasses);
166      BytesToNum(kfst->fstStream,& curpos,& kfst->nrStates);
167      BytesToNum(kfst->fstStream,& curpos,& kfst->termClass);
168      BytesToNum(kfst->fstStream,& curpos,& kfst->alphaHashTabSize);
169      BytesToNum(kfst->fstStream,& curpos,& offs);
170      kfst->alphaHashTabPos = kfst->hdrLen + offs;
171      BytesToNum(kfst->fstStream,& curpos,& kfst->transTabEntrySize);
172      BytesToNum(kfst->fstStream,& curpos,& offs);
173      kfst->transTabPos = kfst->hdrLen + offs;
174      BytesToNum(kfst->fstStream,& curpos,& offs);
175      kfst->inEpsStateTabPos = kfst->hdrLen + offs;
176      BytesToNum(kfst->fstStream,& curpos,& offs);
177      kfst->accStateTabPos = kfst->hdrLen + offs;
178      /* -CT- */
179  
180      return PICO_OK;
181  }
182  
183  
kfstSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)184  static pico_status_t kfstSubObjDeallocate(register picoknow_KnowledgeBase this,
185          picoos_MemoryManager mm)
186  {
187      if (NULL != this) {
188          picoos_deallocate(mm, (void *) &this->subObj);
189      }
190      return PICO_OK;
191  }
192  
193  
194  /* calculates a small number of data (e.g. addresses) from kb for fast access.
195   * This data is encapsulated in a picokfst_FST that can later be retrieved
196   * with picokfst_getFST. */
picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,picoos_Common common)197  pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
198                                                    picoos_Common common)
199  {
200      pico_status_t status;
201  
202      if (NULL == this) {
203          return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL);
204      }
205      if (0 < this->size) {
206          /* not a dummy kb */
207          this->subDeallocate = kfstSubObjDeallocate;
208  
209          this->subObj = picoos_allocate(common->mm, sizeof(kfst_subobj_t));
210  
211          if (NULL == this->subObj) {
212              return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
213          }
214          status = kfstInitialize(this, common);
215          if (PICO_OK != status) {
216              picoos_deallocate(common->mm,(void **)&this->subObj);
217          }
218      }
219      return PICO_OK;
220  }
221  
222  
223  /* ************************************************************/
224  /* FST type and getFST function */
225  /* ************************************************************/
226  
227  
228  
229  /* return kb FST for usage in PU */
picokfst_getFST(picoknow_KnowledgeBase this)230  picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this)
231  {
232      if (NULL == this) {
233          return NULL;
234      } else {
235          return (picokfst_FST) this->subObj;
236      }
237  }
238  
239  
240  
241  /* ************************************************************/
242  /* FST access methods */
243  /* ************************************************************/
244  
245  
246  /* see description in header file */
picokfst_kfstGetTransductionMode(picokfst_FST this)247  extern picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this)
248  {
249      kfst_SubObj fst = (kfst_SubObj) this;
250      if (fst != NULL) {
251          return fst->transductionMode;
252      } else {
253          return 0;
254      }
255  }
256  
257  
258  /* see description in header file */
picokfst_kfstGetFSTSizes(picokfst_FST this,picoos_int32 * nrStates,picoos_int32 * nrClasses)259  extern void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses)
260  {
261      kfst_SubObj fst = (kfst_SubObj) this;
262      if (fst != NULL) {
263          *nrStates = fst->nrStates;
264          *nrClasses = fst->nrClasses;
265      } else {
266          *nrStates = 0;
267          *nrClasses = 0;
268      }
269  }
270  
271  /* see description in header file */
picokfst_kfstStartPairSearch(picokfst_FST this,picokfst_symid_t inSym,picoos_bool * inSymFound,picoos_int32 * searchState)272  extern void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
273                                            picoos_bool * inSymFound, picoos_int32 * searchState)
274  {
275      picoos_uint32 pos;
276      picoos_int32 offs;
277      picoos_int32 h;
278      picoos_int32 inSymCellPos;
279      picoos_int32 inSymX;
280      picoos_int32 nextSameHashInSymOffs;
281  
282      kfst_SubObj fst = (kfst_SubObj) this;
283      (*searchState) =  -1;
284      (*inSymFound) = 0;
285      h = inSym % fst->alphaHashTabSize;
286      pos = fst->alphaHashTabPos + (h * 4);
287      FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
288      if (offs > 0) {
289          inSymCellPos = fst->alphaHashTabPos + offs;
290          pos = inSymCellPos;
291          BytesToNum(fst->fstStream,& pos,& inSymX);
292          BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
293          while ((inSymX != inSym) && (nextSameHashInSymOffs > 0)) {
294              inSymCellPos = inSymCellPos + nextSameHashInSymOffs;
295              pos = inSymCellPos;
296              BytesToNum(fst->fstStream,& pos,& inSymX);
297              BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs);
298          }
299          if (inSymX == inSym) {
300              /* input symbol found; state is set to position after symbol cell */
301              (*searchState) = pos;
302              (*inSymFound) = 1;
303          }
304      }
305  }
306  
307  
308  /* see description in header file */
picokfst_kfstGetNextPair(picokfst_FST this,picoos_int32 * searchState,picoos_bool * pairFound,picokfst_symid_t * outSym,picokfst_class_t * pairClass)309  extern void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
310                                        picoos_bool * pairFound,
311                                        picokfst_symid_t * outSym, picokfst_class_t * pairClass)
312  {
313      picoos_uint32 pos;
314      picoos_int32 val;
315  
316      kfst_SubObj fst = (kfst_SubObj) this;
317      if ((*searchState) < 0) {
318          (*pairFound) = 0;
319          (*outSym) = PICOKFST_SYMID_ILLEG;
320          (*pairClass) =  -1;
321      } else {
322          pos = (*searchState);
323          BytesToNum(fst->fstStream,& pos,& val);
324          *outSym = (picokfst_symid_t)val;
325          if ((*outSym) != PICOKFST_SYMID_ILLEG) {
326              BytesToNum(fst->fstStream,& pos,& val);
327              *pairClass = (picokfst_class_t)val;
328              (*pairFound) = 1;
329              (*searchState) = pos;
330          } else {
331              (*pairFound) = 0;
332              (*outSym) = PICOKFST_SYMID_ILLEG;
333              (*pairClass) =  -1;
334              (*searchState) =  -1;
335          }
336      }
337  }
338  
339  
340  
341  /* see description in header file */
picokfst_kfstGetTrans(picokfst_FST this,picokfst_state_t startState,picokfst_class_t transClass,picokfst_state_t * endState)342  extern void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
343                                     picokfst_state_t * endState)
344  {
345  
346      picoos_uint32 pos;
347      picoos_int32 index;
348      picoos_uint32 endStateX;
349  
350      kfst_SubObj fst = (kfst_SubObj) this;
351      if ((startState < 1) || (startState > fst->nrStates) || (transClass < 1) || (transClass > fst->nrClasses)) {
352          (*endState) = 0;
353      } else {
354          index = (startState - 1) * fst->nrClasses + transClass - 1;
355          pos = fst->transTabPos + (index * fst->transTabEntrySize);
356          FixedBytesToUnsignedNum(fst->fstStream,fst->transTabEntrySize,& pos,& endStateX);
357          (*endState) = endStateX;
358      }
359  }
360  
361  
362  /* see description in header file */
picokfst_kfstStartInEpsTransSearch(picokfst_FST this,picokfst_state_t startState,picoos_bool * inEpsTransFound,picoos_int32 * searchState)363  extern void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
364                                                  picoos_bool * inEpsTransFound, picoos_int32 * searchState)
365  {
366  
367      picoos_int32 offs;
368      picoos_uint32 pos;
369  
370      kfst_SubObj fst = (kfst_SubObj) this;
371      (*searchState) =  -1;
372      (*inEpsTransFound) = 0;
373      if ((startState > 0) && (startState <= fst->nrStates)) {
374          pos = fst->inEpsStateTabPos + (startState - 1) * 4;
375          FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs);
376          if (offs > 0) {
377              (*searchState) = fst->inEpsStateTabPos + offs;
378              (*inEpsTransFound) = 1;
379          }
380      }
381  }
382  
383  
384  
385  /* see description in header file */
picokfst_kfstGetNextInEpsTrans(picokfst_FST this,picoos_int32 * searchState,picoos_bool * inEpsTransFound,picokfst_symid_t * outSym,picokfst_state_t * endState)386  extern void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
387                                              picoos_bool * inEpsTransFound,
388                                              picokfst_symid_t * outSym, picokfst_state_t * endState)
389  {
390      picoos_uint32 pos;
391      picoos_int32 val;
392  
393      kfst_SubObj fst = (kfst_SubObj) this;
394      if ((*searchState) < 0) {
395          (*inEpsTransFound) = 0;
396          (*outSym) = PICOKFST_SYMID_ILLEG;
397          (*endState) = 0;
398      } else {
399          pos = (*searchState);
400          BytesToNum(fst->fstStream,& pos,& val);
401          *outSym = (picokfst_symid_t)val;
402          if ((*outSym) != PICOKFST_SYMID_ILLEG) {
403              BytesToNum(fst->fstStream,& pos,& val);
404              *endState = (picokfst_state_t)val;
405              (*inEpsTransFound) = 1;
406              (*searchState) = pos;
407          } else {
408              (*inEpsTransFound) = 0;
409              (*outSym) = PICOKFST_SYMID_ILLEG;
410              (*endState) = 0;
411              (*searchState) =  -1;
412          }
413      }
414  }
415  
416  
417  /* see description in header file */
picokfst_kfstIsAcceptingState(picokfst_FST this,picokfst_state_t state)418  extern picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state)
419  {
420  
421      picoos_uint32 pos;
422      picoos_uint32 val;
423  
424      kfst_SubObj fst = (kfst_SubObj) this;
425      if ((state > 0) && (state <= fst->nrStates)) {
426          pos = fst->accStateTabPos + (state - 1);
427          FixedBytesToUnsignedNum(fst->fstStream,1,& pos,& val);
428          return (val == 1);
429      } else {
430          return 0;
431      }
432  }
433  
434  #ifdef __cplusplus
435  }
436  #endif
437  
438  /* End picofst.c */
439