1 /*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /**
17 * @file picotrns.c
18 *
19 * fst processing
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29 #include "picoos.h"
30 #include "picodbg.h"
31 /* #include "picodata.h" */
32 /* #include "picoknow.h" */
33 #include "picoktab.h"
34 #include "picokfst.h"
35 #include "picotrns.h"
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 #if 0
41 }
42 #endif
43
44
45
picotrns_unplane(picoos_int16 symIn,picoos_uint8 * plane)46 picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane) {
47 if (symIn < 0) {
48 (*plane) = 0;
49 return (picoos_uint8) symIn;
50 } else {
51 (*plane) = symIn >> 8;
52 return (picoos_uint8) (symIn & 0xFF);
53 }
54 }
55
56 #if defined(PICO_DEBUG)
57
PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg,picoos_int16 insym,picoos_uint8 phonemic)58 void PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg, picoos_int16 insym, picoos_uint8 phonemic)
59 {
60 #include "picokdbg.h"
61 picoos_int16 sym;
62 picoos_uint8 plane;
63 picokdbg_Dbg dbg = (NULL == kbdbg) ? NULL : picokdbg_getDbg(kbdbg);
64 sym = picotrns_unplane(insym, &plane);
65 switch (plane) {
66 case PICOKFST_PLANE_PHONEMES: /* phones */
67 if ((NULL == dbg) || !phonemic) {
68 PICODBG_INFO_MSG((" %c", sym));
69 } else {
70 PICODBG_INFO_MSG((" %s", picokdbg_getPhoneSym(dbg, (picoos_uint8) sym)));
71 }
72 break;
73 case PICOKFST_PLANE_ACCENTS: /* accents */
74 PICODBG_INFO_MSG((" {A%c}", sym));
75 break;
76 case PICOKFST_PLANE_XSAMPA: /* xsampa symbols */
77 PICODBG_INFO_MSG((" {XS:(%i)}", sym));
78 break;
79 case PICOKFST_PLANE_POS: /* part of speech */
80 PICODBG_INFO_MSG((" {P:%d}", sym));
81 break;
82 case PICOKFST_PLANE_PB_STRENGTHS: /* phrases */
83 if (sym == 48) {
84 PICODBG_INFO_MSG((" {WB}", sym));
85 } else if (sym == 115) {
86 PICODBG_INFO_MSG((" {P0}", sym));
87 } else {
88 PICODBG_INFO_MSG((" {P%c}", sym));
89 }
90 break;
91 case PICOKFST_PLANE_INTERN: /* intern */
92 PICODBG_INFO_MSG((" [%c]", sym));
93 break;
94 }
95 }
96
PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg,picoos_int16 insym)97 void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym)
98 {
99 PICOTRNS_PRINTSYM1(kbdbg,insym,1);
100 }
101
PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg,const picotrns_possym_t seq[],const picoos_uint16 seqLen,picoos_uint8 phonemic)102 void PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen,
103 picoos_uint8 phonemic) {
104 picoos_uint16 i;
105 for (i=0; i<seqLen; i++) {
106 PICOTRNS_PRINTSYM1(kbdbg, seq[i].sym, phonemic);
107 }
108 }
109
PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg,const picotrns_possym_t seq[],const picoos_uint16 seqLen)110 void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen) {
111 PICOTRNS_PRINTSYMSEQ1(kbdbg,seq, seqLen, 1);
112 }
113
picotrns_printSolution(const picotrns_possym_t outSeq[],const picoos_uint16 outSeqLen)114 void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen)
115 {
116 PICODBG_INFO_CTX();
117 PICODBG_INFO_MSG(("solution: "));
118 PICOTRNS_PRINTSYMSEQ(NULL, outSeq, outSeqLen);
119 PICODBG_INFO_MSG(("\n"));
120 }
121
picotrns_printSolutionAscii(const picotrns_possym_t outSeq[],const picoos_uint16 outSeqLen)122 void picotrns_printSolutionAscii(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen)
123 {
124 PICODBG_INFO_CTX();
125 PICODBG_INFO_MSG(("solution: "));
126 PICOTRNS_PRINTSYMSEQ1(NULL, outSeq, outSeqLen,0);
127 PICODBG_INFO_MSG(("\n"));
128 }
129
130 #endif
131
132
133
134
135 /* * +CT+ ***/
136 struct picotrns_transductionState {
137 picoos_uint16 phase; /* transduction phase:
138 0 = before start
139 1 = before regular recursion step
140 2 = before finish
141 3 = after finish */
142 picoos_uint32 nrSol; /* nr of solutions so far */
143 picoos_int16 recPos; /* recursion position; must be signed! */
144 };
145
146 typedef struct picotrns_altDesc {
147 picokfst_state_t startFSTState; /**< starting FST state in current recursion position */
148 picoos_int32 inPos; /**< corresponding position in input string */
149 picokfst_state_t altState; /**< state of alternatives search;
150 - 0 = before pair search
151 - 1 = search state is a valid pair search state
152 - 2 = before inEps search
153 - 3 = search state is a valid inEps trans search state
154 - 4 = no more alternatives */
155 picoos_int32 searchState; /**< pair search state or inEps trans search state */
156 picokfst_symid_t altOutSym; /**< current output symbol at this recursion position */
157 picoos_int32 altOutRefPos; /**< output reference position at this recursion position */
158 } picotrns_altDesc_t;
159
160
picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm,picoos_uint32 maxByteSize,picoos_uint16 * numAltDescs)161 picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs)
162 {
163 picotrns_AltDesc buf;
164 (*numAltDescs) = (picoos_uint32) (maxByteSize / sizeof(picotrns_altDesc_t));
165 buf = (picotrns_AltDesc) picoos_allocate(mm, (*numAltDescs) * sizeof(picotrns_altDesc_t));
166 if (NULL == buf) {
167 (*numAltDescs) = 0;
168 return NULL;
169 } else {
170 return buf;
171 }
172 }
173
picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm,picotrns_AltDesc * altDescBuf)174 void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf)
175 {
176 picoos_deallocate(mm, (void *) altDescBuf);
177 }
178
179 /* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */
picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen)180 pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
181 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
182 {
183 picoos_uint16 i, j = 0;
184
185 for (i=0; i < inSeqLen; i++) {
186 /* it is assumed that PICOKFST_SYMID_EPS is a hardwired value and not shifted */
187 if (PICOKFST_SYMID_EPS != inSeq[i].sym) {
188 if (j < maxOutSeqLen) {
189 outSeq[j].pos = inSeq[i].pos;
190 outSeq[j].sym = inSeq[i].sym;
191 j++;
192 }
193 }
194 *outSeqLen = j;
195 }
196 return PICO_OK;
197 }
198
199
insertSym(picotrns_possym_t inSeq[],picoos_uint16 pos,picoos_int16 sym)200 static void insertSym(picotrns_possym_t inSeq[], picoos_uint16 pos, picoos_int16 sym) {
201 inSeq[pos].sym = sym;
202 inSeq[pos].pos = PICOTRNS_POS_INSERT;
203 }
204
205 /* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way.
206 * inSeq is assumed to be at most PICOTRNS_MAX_NUM_POSSYM, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM */
picotrns_trivial_syllabify(picoktab_Phones phones,const picotrns_possym_t inSeq[],const picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen)207 pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones,
208 const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen,
209 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
210 {
211 picoos_uint16 i = 0, j = 0, out = 0, numInserted = 0;
212 picoos_uint8 vowelFound = FALSE;
213 picoos_uint16 accentpos = 0;
214 picoos_int16 accent = 0;
215
216 PICODBG_TRACE(("start"));
217
218
219 while (i < inSeqLen) {
220 /* make sure that at least one more sylSep can be inserted */
221 if (inSeqLen+numInserted+1 >= maxOutSeqLen) {
222 return PICO_EXC_BUF_OVERFLOW;
223 }
224 /* let j skip consonant cluster */
225 accent = 0;
226 accentpos = 0;
227 while ((j < inSeqLen) && !picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[j].sym)) {
228 if ((inSeq[j].sym == picoktab_getPrimstressID(phones))
229 || (inSeq[j].sym == picoktab_getPrimstressID(phones))) {
230 PICODBG_TRACE(("j skipping stress symbol inSeq[%i].sym = %c", j, inSeq[j].sym));
231 accent = inSeq[j].sym;
232 accentpos = j;
233 } else {
234 PICODBG_TRACE(("j skipping consonant inSeq[%i].sym = %c", j, inSeq[j].sym));
235 }
236 j++;
237 }
238 if (j < inSeqLen) { /* j is at the start of a new vowel */
239 /* copy consonant cluster (moving i) to output, insert syll separator if between vowels */
240 while (i < j-1) {
241 if ((accent > 0) && (i == accentpos)) {
242 PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym));
243 i++;
244 } else {
245 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym));
246 outSeq[out++] = inSeq[i++];
247 }
248 }
249 if (vowelFound) { /* we're between vowels */
250 PICODBG_TRACE(("inserting syllable separator into output buffer"));
251 insertSym(outSeq,out++,picoktab_getSyllboundID(phones));
252 if (accent > 0) {
253 insertSym(outSeq,out++,accent);
254 }
255 numInserted++;
256 }
257 if ((accent > 0) && (i == accentpos)) {
258 PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym));
259 i++;
260 } else {
261 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym));
262 outSeq[out++] = inSeq[i++];
263 }
264 vowelFound = TRUE;
265 /* now copy vowel cluster */
266 while ((i < inSeqLen) && picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[i].sym)) {
267 PICODBG_TRACE(("copying inSeq[%i].sym = %c (vowel) into output buffer", i, inSeq[i].sym));
268 outSeq[out++] = inSeq[i++];
269 }
270 j = i;
271 } else { /* j is at end of word or end of input */
272 while (i < j) {
273 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant or stress) into output buffer", i, inSeq[i].sym));
274 outSeq[out++] = inSeq[i++];
275 }
276 }
277 *outSeqLen = out;
278 }
279 PICODBG_ASSERT((out == inSeqLen + numInserted));
280
281 return PICO_OK;
282 }
283
284
285 /* ******** +CT+: full transduction procedure **********/
286
287
288 /* Gets next acceptable alternative for output symbol '*outSym' at current recursion position
289 starting from previous alternative in 'altDesc'; possibly uses input symbol
290 given by 'inSeq'/'inSeq'; returns whether alterative was found in '*found';
291 if '*found', the other output values ('*outRefPos', '*endFSTstate', '*nextInPos'*)
292 return the characteristics for next recursion step;
293 if '*found' is false, the output values are undefined. */
294
GetNextAlternative(picokfst_FST fst,picotrns_AltDesc altDesc,const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picokfst_symid_t * outSym,picoos_int32 * outRefPos,picokfst_state_t * endFSTState,picoos_int32 * nextInPos,picoos_bool * found)295 static void GetNextAlternative (picokfst_FST fst, picotrns_AltDesc altDesc,
296 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
297 picokfst_symid_t * outSym, picoos_int32 * outRefPos,
298 picokfst_state_t * endFSTState, picoos_int32 * nextInPos, picoos_bool * found)
299 {
300
301 picoos_bool inSymFound;
302 picoos_bool pairFound;
303 picokfst_class_t pairClass;
304 picoos_bool inEpsTransFound;
305 picokfst_symid_t inSym;
306
307 (*found) = 0;
308 do {
309 switch (altDesc->altState) {
310 case 0: /* before pair search */
311 if (altDesc->inPos < inSeqLen) {
312 inSym = inSeq[altDesc->inPos].sym;
313 if (inSym == PICOKFST_SYMID_EPS) {
314 /* very special case: input epsilon simply produces eps in output
315 without fst state change */
316 (*found) = 1;
317 (*outSym) = PICOKFST_SYMID_EPS;
318 (*outRefPos) = inSeq[altDesc->inPos].pos;
319 (*endFSTState) = altDesc->startFSTState;
320 (*nextInPos) = altDesc->inPos + 1;
321 altDesc->altState = 2;
322 } else {
323 /* start search for alternatives using input symbol */
324 picokfst_kfstStartPairSearch(fst,inSeq[altDesc->inPos].sym,& inSymFound,& altDesc->searchState);
325 if (!inSymFound) {
326 altDesc->altState = 2;
327 PICODBG_INFO_CTX();
328 PICODBG_INFO_MSG((" didnt find symbol "));
329 PICOTRNS_PRINTSYM(NULL, inSeq[altDesc->inPos].sym);
330 PICODBG_INFO_MSG(("\n"));
331
332 } else {
333 altDesc->altState = 1;
334 }
335 }
336 } else {
337 altDesc->altState = 2;
338 }
339 break;
340 case 1: /* within pair search */
341 picokfst_kfstGetNextPair(fst,& altDesc->searchState,& pairFound,& (*outSym),& pairClass);
342 if (pairFound) {
343 picokfst_kfstGetTrans(fst,altDesc->startFSTState,pairClass,& (*endFSTState));
344 if ((*endFSTState) > 0) {
345 (*found) = 1;
346 (*outRefPos) = inSeq[altDesc->inPos].pos;
347 (*nextInPos) = altDesc->inPos + 1;
348 }
349 } else {
350 /* no more pair found */
351 altDesc->altState = 2;
352 }
353 break;
354 case 2: /* before inEps trans search */
355 picokfst_kfstStartInEpsTransSearch(fst,altDesc->startFSTState,& inEpsTransFound,& altDesc->searchState);
356 if (inEpsTransFound) {
357 altDesc->altState = 3;
358 } else {
359 altDesc->altState = 4;
360 }
361 break;
362 case 3: /* within inEps trans search */
363 picokfst_kfstGetNextInEpsTrans(fst,& altDesc->searchState,& inEpsTransFound,& (*outSym),& (*endFSTState));
364 if (inEpsTransFound) {
365 (*found) = 1;
366 (*outRefPos) = PICOTRNS_POS_INSERT;
367 (*nextInPos) = altDesc->inPos;
368 } else {
369 altDesc->altState = 4;
370 }
371 break;
372 case 4: /* no more alternatives */
373 break;
374 }
375 } while (! ((*found) || (altDesc->altState == 4)) ); /* i.e., until (*found) || (altState == 4) */
376 }
377
378
379
380 /* Transfers current alternatives path stored in 'altDesc' with current path length 'pathLen'
381 into 'outSeq'/'outSeqLen'. The number of solutions is incremented. */
382
NoteSolution(picoos_uint32 * nrSol,picotrns_printSolutionFct printSolution,picotrns_altDesc_t altDesc[],picoos_uint16 pathLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen)383 static void NoteSolution (picoos_uint32 * nrSol, picotrns_printSolutionFct printSolution,
384 picotrns_altDesc_t altDesc[], picoos_uint16 pathLen,
385 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen)
386 {
387 register picotrns_AltDesc ap;
388 picoos_uint32 i;
389
390 (*nrSol)++;
391 (*outSeqLen) = 0;
392 for (i = 0; i < pathLen; i++) {
393 if (i < maxOutSeqLen) {
394 ap = &altDesc[i];
395 outSeq[i].sym = ap->altOutSym;
396 outSeq[i].pos = ap->altOutRefPos;
397 (*outSeqLen)++;
398 }
399 }
400 if (pathLen > maxOutSeqLen) {
401 PICODBG_WARN(("**** output symbol array too small to hold full solution\n"));
402 }
403 if (printSolution != NULL) {
404 printSolution(outSeq,(*outSeqLen));
405 }
406 }
407
408
409
410 /* *
411 general scheme to get all solutions ("position" refers to abstract backtracking recursion depth,
412 which in the current solution is equal to the output symbol position):
413
414 "set position to first position";
415 "initialize alternatives in first position";
416 REPEAT
417 IF "current state in current position is a solution" THEN
418 "note solution";
419 END;
420 "get first or next acceptable alternative in current position";
421 IF "acceptable alternative found" THEN
422 "note alternative";
423 "go to next position";
424 "initialize alternatives in that position";
425 ELSE
426 "step back to previous position";
427 END;
428 UNTIL "current position is before first position"
429 ***/
430
431
432 /* Initializes transduction state for further use in repeated application
433 of 'TransductionStep'. */
434
StartTransduction(struct picotrns_transductionState * transductionState)435 static void StartTransduction (struct picotrns_transductionState * transductionState)
436 {
437 (*transductionState).phase = 0;
438 }
439
440
441
442 /* Performs one step in the transduction of 'inSeqLen' input symbols with corresponding
443 reference positions in 'inSeq'. '*transductionState' must have been
444 initialized by 'StartTransduction'. Repeat calls to this procedure until '*finished' returns true.
445 The output is returned in 'outSeqLen' symbols and reference positions in 'outSeq'.
446 The output reference positions refer to the corresponding input reference positions.
447 Inserted output symbols receive the reference position -1. If several solutions are possible,
448 only the last found solution is returned.
449 'altDesc' is a temporary workspace which should be at least one cell longer than 'outSeq'.
450 'firstSolOnly' determines whether only the first solution should be found or if
451 the search should go on to find all solutions (mainly for testing purposes).
452
453 NOTE: current version written for use in single repetitive steps;
454 could be simplified if full transduction can be done as an atomic operation */
455
TransductionStep(picokfst_FST fst,struct picotrns_transductionState * transductionState,picotrns_altDesc_t altDesc[],picoos_uint16 maxAltDescLen,picoos_bool firstSolOnly,picotrns_printSolutionFct printSolution,const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen,picoos_bool * finished)456 static void TransductionStep (picokfst_FST fst, struct picotrns_transductionState * transductionState,
457 picotrns_altDesc_t altDesc[], picoos_uint16 maxAltDescLen,
458 picoos_bool firstSolOnly, picotrns_printSolutionFct printSolution,
459 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
460 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
461 picoos_bool * finished)
462 {
463 register picotrns_AltDesc ap;
464 picoos_int32 i;
465 picokfst_state_t endFSTState;
466 picoos_int32 nextInPos;
467 picoos_bool found;
468 picokfst_symid_t outSym;
469 picoos_int32 outRefPos;
470 picoos_int32 tmpRecPos;
471
472 (*finished) = 0;
473 tmpRecPos = (*transductionState).recPos;
474 switch ((*transductionState).phase) {
475 case 0: /* before initialization */
476 (*transductionState).nrSol = 0;
477
478 /* check for initial solution (empty strings are always accepted) */
479 if (inSeqLen == 0) {
480 NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,0,outSeq,outSeqLen,maxOutSeqLen);
481 }
482
483 /* initialize first recursion position */
484 tmpRecPos = 0;
485 ap = & altDesc[0];
486 ap->startFSTState = 1;
487 ap->inPos = 0;
488 ap->altState = 0;
489 (*transductionState).phase = 1;
490 break;
491
492 case 1: /* before regular recursion step */
493 if ((tmpRecPos < 0) || (firstSolOnly && ((*transductionState).nrSol > 0))) {
494 /* end reached */
495 (*transductionState).phase = 2;
496 } else {
497 /* not finished; do regular step */
498
499 /* get first or next acceptable alternative in current position */
500 GetNextAlternative(fst,& altDesc[tmpRecPos],inSeq,inSeqLen,& outSym,& outRefPos,& endFSTState,& nextInPos,& found);
501 if (found) {
502 /* note alternative in current position */
503 ap = & altDesc[tmpRecPos];
504 ap->altOutSym = outSym;
505 ap->altOutRefPos = outRefPos;
506
507 /* check for solution after found alternative */
508 if ((nextInPos == inSeqLen) && picokfst_kfstIsAcceptingState(fst,endFSTState)) {
509 NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,tmpRecPos+1,
510 outSeq,outSeqLen,maxOutSeqLen);
511 }
512
513 /* go to next position if possible, start search for follower alternative symbols */
514 if (tmpRecPos < maxAltDescLen-1) {
515 /* got to next position */
516 tmpRecPos = tmpRecPos + 1;
517
518 /* initialize alternatives in new position */
519 ap = & altDesc[tmpRecPos];
520 ap->startFSTState = endFSTState;
521 ap->inPos = nextInPos;
522 ap->altState = 0;
523
524 } else {
525 /* do not go on due to limited path but still treat alternatives in current position */
526 PICODBG_WARN(("--- transduction path too long; may fail to find solution\n"));
527 }
528 } else { /* no more acceptable alternative found in current position */
529 /* backtrack to previous recursion */
530 tmpRecPos = tmpRecPos - 1;
531 }
532 }
533 break;
534
535 case 2: /* before finish */
536 if ((*transductionState).nrSol == 0) {
537 PICODBG_WARN(("--- no transduction solution found, using input as output\n"));
538 i = 0;
539 while ((i < inSeqLen) && (i < maxOutSeqLen)) {
540 outSeq[i].sym = inSeq[i].sym;
541 outSeq[i].pos = inSeq[i].pos;
542 i++;
543 }
544 (*outSeqLen) = i;
545 } else if ((*transductionState).nrSol > 1) {
546 PICODBG_WARN(("--- more than one transducer solutions found\n"));
547 }
548 (*transductionState).phase = 3;
549 break;
550
551 case 3: /* after finish */
552 (*finished) = 1;
553 break;
554 }
555 (*transductionState).recPos = tmpRecPos;
556 }
557
558
559
560 /* see description in header */
picotrns_transduce(picokfst_FST fst,picoos_bool firstSolOnly,picotrns_printSolutionFct printSolution,const picotrns_possym_t inSeq[],picoos_uint16 inSeqLen,picotrns_possym_t outSeq[],picoos_uint16 * outSeqLen,picoos_uint16 maxOutSeqLen,picotrns_AltDesc altDescBuf,picoos_uint16 maxAltDescLen,picoos_uint32 * nrSteps)561 pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly,
562 picotrns_printSolutionFct printSolution,
563 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen,
564 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen,
565 picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen,
566 picoos_uint32 *nrSteps)
567 {
568 struct picotrns_transductionState transductionState;
569 picoos_bool finished;
570
571 #if defined(PICO_DEBUG)
572 {
573 picoos_uint16 i;
574
575 PICODBG_INFO_CTX();
576 PICODBG_INFO_MSG(("got input: "));
577 for (i=0; i<inSeqLen; i++) {
578 PICODBG_INFO_MSG((" %d", inSeq[i].sym));
579 }
580 PICODBG_INFO_MSG((" ("));
581 PICOTRNS_PRINTSYMSEQ(NULL,inSeq,inSeqLen);
582 PICODBG_INFO_MSG((")\n"));
583 }
584 #endif
585 StartTransduction(&transductionState);
586 finished = 0;
587 *nrSteps = 0;
588 while (!finished) {
589 TransductionStep(fst,&transductionState,altDescBuf,maxAltDescLen,firstSolOnly,printSolution,
590 inSeq,inSeqLen,outSeq,outSeqLen,maxOutSeqLen,&finished);
591 (*nrSteps)++;
592 }
593
594 return PICO_OK;
595 }
596
597
598 /**
599 * Data structure for picotrns_SimpleTransducer object.
600 */
601 typedef struct picotrns_simple_transducer {
602 picoos_Common common;
603 picotrns_possym_t possymBufA[PICOTRNS_MAX_NUM_POSSYM+1];
604 picotrns_possym_t possymBufB[PICOTRNS_MAX_NUM_POSSYM+1];
605 picotrns_possym_t * possymBuf; /**< the buffer of the pos/sym pairs */
606 picotrns_possym_t * possymBufTmp;
607 picoos_uint16 possymReadPos, possymWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
608
609 /* buffer for internal calculation of transducer */
610 picotrns_AltDesc altDescBuf;
611 /* the number of AltDesc in the buffer */
612 picoos_uint16 maxAltDescLen;
613 } picotrns_simple_transducer_t;
614
615
picotrns_stInitialize(picotrns_SimpleTransducer transducer)616 pico_status_t picotrns_stInitialize(picotrns_SimpleTransducer transducer)
617 {
618 transducer->possymBuf = transducer->possymBufA;
619 transducer->possymBufTmp = transducer->possymBufB;
620 transducer->possymReadPos = 0;
621 transducer->possymWritePos = 0;
622 return PICO_OK;
623 }
624 /** creates a SimpleTranducer with a working buffer of given size
625 *
626 * @param mm MemoryManager handle
627 * @param common Common handle
628 * @param maxAltDescLen maximal size for working buffer (in bytes)
629 * @return handle to new SimpleTransducer or NULL if error
630 */
picotrns_newSimpleTransducer(picoos_MemoryManager mm,picoos_Common common,picoos_uint16 maxAltDescLen)631 picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm,
632 picoos_Common common,
633 picoos_uint16 maxAltDescLen)
634 {
635 picotrns_SimpleTransducer this;
636 this = picoos_allocate(mm, sizeof(picotrns_simple_transducer_t));
637 if (this == NULL) {
638 picoos_deallocate(mm, (void *)&this);
639 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
640 return NULL;
641 }
642
643 /* allocate working buffer */
644 this->altDescBuf = picotrns_allocate_alt_desc_buf(mm, maxAltDescLen, &this->maxAltDescLen);
645 if (this->altDescBuf == NULL) {
646 picoos_deallocate(mm, (void *)&this);
647 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
648 return NULL;
649 }
650 this->common = common;
651 picotrns_stInitialize(this);
652 return this;
653 }
654 /** disposes a SimpleTransducer
655 *
656 * @param this
657 * @param mm
658 * @return PICO_OK
659 */
picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,picoos_MemoryManager mm)660 pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this,
661 picoos_MemoryManager mm)
662 {
663 if (NULL != (*this)) {
664 picotrns_deallocate_alt_desc_buf(mm,&(*this)->altDescBuf);
665 picoos_deallocate(mm, (void *) this);
666 (*this) = NULL;
667 }
668 return PICO_OK;
669 }
670
671 /** transduces the contents previously inserted via @ref picotrns_newSimpleTransducer and @ref
672 * picotrns_disposeSimpleTransducer.
673 *
674 * @param this
675 * @param fst
676 * @return
677 */
picotrns_stTransduce(picotrns_SimpleTransducer this,picokfst_FST fst)678 pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst)
679 {
680 picoos_uint16 outSeqLen;
681 picoos_uint32 nrSteps;
682 pico_status_t status;
683
684 status = picotrns_transduce(fst,TRUE,NULL,
685 this->possymBuf, this->possymWritePos,
686 this->possymBufTmp,&outSeqLen, PICOTRNS_MAX_NUM_POSSYM,
687 this->altDescBuf,this->maxAltDescLen,&nrSteps);
688 if (PICO_OK != status) {
689 return status;
690 }
691 return picotrns_eliminate_epsilons(this->possymBufTmp,outSeqLen,this->possymBuf,&this->possymWritePos,PICOTRNS_MAX_NUM_POSSYM);
692 }
693
694 /**
695 * Add chars from NULLC-terminated string \c inStr, shifted to plane \c plane, to internal input buffer of
696 * \c transducer.
697 *
698 * @param this is an initialized picotrns_SimpleTransducer
699 * @param inStr NULLC-terminated byte sequence
700 * @param plane
701 * @return PICO_OK, if all bytes fit into buffer, or PICO_EXC_BUF_OVERFLOW otherwise
702 */
picotrns_stAddWithPlane(picotrns_SimpleTransducer this,picoos_char * inStr,picoos_uint8 plane)703 pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane)
704 {
705 while ((*inStr) && (this->possymWritePos < PICOTRNS_MAX_NUM_POSSYM)) {
706 this->possymBuf[this->possymWritePos].pos = PICOTRNS_POS_INSERT;
707 this->possymBuf[this->possymWritePos].sym = (plane << 8) + (*inStr);
708 PICODBG_DEBUG(("inserting pos/sym = %i/'%c' at pos %i",
709 this->possymBuf[this->possymWritePos].pos,
710 this->possymBuf[this->possymWritePos].sym,
711 this->possymWritePos));
712 this->possymWritePos++;
713 inStr++;
714 }
715 if (!(*inStr)) {
716 return PICO_OK;
717 } else {
718 return PICO_EXC_BUF_OVERFLOW;
719 }
720 }
721
picotrns_stGetSymSequence(picotrns_SimpleTransducer this,picoos_uint8 * outputSymIds,picoos_uint32 maxOutputSymIds)722 pico_status_t picotrns_stGetSymSequence(
723 picotrns_SimpleTransducer this,
724 picoos_uint8 * outputSymIds,
725 picoos_uint32 maxOutputSymIds)
726 {
727 picoos_uint8 plane;
728 picoos_uint32 outputCount = 0;
729 while ((this->possymReadPos < this->possymWritePos) && (outputCount < maxOutputSymIds)) {
730 *outputSymIds++ = picotrns_unplane(this->possymBuf[this->possymReadPos++].sym, &plane);
731 outputCount++;
732 }
733 *outputSymIds = NULLC;
734 if (outputCount <= maxOutputSymIds) {
735 return PICO_OK;
736 } else {
737 return PICO_EXC_BUF_OVERFLOW;
738 }
739 }
740
741 #ifdef __cplusplus
742 }
743 #endif
744
745 /* end picotrns.c */
746