1 /*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /**
17 * @file picosa.c
18 *
19 * sentence analysis - POS disambiguation
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picobase.h"
32 #include "picokdt.h"
33 #include "picoklex.h"
34 #include "picoktab.h"
35 #include "picokfst.h"
36 #include "picotrns.h"
37 #include "picodata.h"
38 #include "picosa.h"
39
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 #if 0
44 }
45 #endif
46
47
48 /* PU saStep states */
49 #define SA_STEPSTATE_COLLECT 0
50 #define SA_STEPSTATE_PROCESS_POSD 10
51 #define SA_STEPSTATE_PROCESS_WPHO 11
52 #define SA_STEPSTATE_PROCESS_TRNS_PARSE 12
53 #define SA_STEPSTATE_PROCESS_TRNS_FST 13
54 #define SA_STEPSTATE_FEED 2
55
56 #define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2))
57
58 #define SA_MSGSTR_SIZE 32
59
60 /* subobject : SentAnaUnit
61 * shortcut : sa
62 * context size : one phrase, max. 30 non-PUNC items, for non-processed items
63 * one item if internal input empty
64 */
65
66 /** @addtogroup picosa
67
68 internal buffers:
69
70 - headx: array for extended item heads of fixed size (head plus
71 index for content, plus two fields for boundary strength/type)
72
73 - cbuf1, cbuf2: buffers for item contents (referenced by index in
74 headx). Future: replace these two buffers by a single double-sided
75 buffer (double shrink-grow type)
76
77 0. bottom up filling of items in headx and cbuf1
78
79 1. POS disambiguation (right-to-left, top-to-bottom):
80 - number and sequence of items unchanged
81 - item content can only get smaller (reducing nr of results in WORDINDEX)
82 -> info stays in "headx, cbuf1" and changed in place \n
83 WORDGRAPH(POSes,NA)graph -> WORDGRAPH(POS,NA)graph \n
84 WORDINDEX(POSes,NA)POS1ind1...POSNindN -> WORDINDEX(POS,NA)POS|ind \n
85
86 2. lex-index lookup and G2P (both directions possible, left-to-right done):
87 - number and sequence of items unchanged, item head info and content
88 changes
89 -> headx changed in place; cbuf1 to cbuf2 \n
90 WORDGRAPH(POS,NA)graph -> WORDPHON(POS,NA)phon \n
91 WORDINDEX(POS,NA)POS|ind -> WORDPHON(POS,NA)phon \n
92
93 3. phrasing (right-to-left):
94
95 Previous (before introducing SBEG)\n
96 ----------------------------------
97 1| 2| 3| 4| \n
98 e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH \n
99 e.g. to BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM \n
100 |1 |2 |3 |4 \n
101
102 3-level bound state: to keep track of bound strength from end of
103 previous punc-phrase, then BOUND item output as first item
104 (strength from prev punc-phrase and type from current
105 punc-phrase).
106
107 trailing PUNC item bound states
108 INIT SEND PHR1
109 PUNC(SENTEND, T) B(I,T)>SEND B(S,T)>SEND B(P1,T)>SEND
110 PUNC(SENTEND, Q) B(I,Q)>SEND B(S,Q)>SEND B(P1,Q)>SEND
111 PUNC(SENTEND, E) B(I,E)>SEND B(S,E)>SEND B(P1,E)>SEND
112 PUNC(PHRASEEND, P) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1
113 PUNC(PHRASEEND, FORC) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1
114 PUNC(FLUSH, T) B(I,T).. B(S,T).. B(P1,T)..
115 B(T,NA) B(T,NA) B(T,NA)
116 >INIT >INIT >INIT
117
118 PHR2/3 case:
119 trailing PUNC item bound states
120 INIT SEND PHR1
121 PUNC(SENTEND, T) B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND
122 PUNC(SENTEND, Q) B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND
123 PUNC(SENTEND, E) B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND
124 PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
125 PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
126 PUNC(FLUSH, T) B(I,P)B(P,T).. B(S,T)B(P,T).. B(P1,T)B(P,T)..
127 B(T,NA) B(T,NA) B(T,NA)
128 >INIT >INIT >INIT
129
130 Current
131 --------
132 e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH
133 e.g. to BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM
134 |1 |2 |3 |4
135
136 2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
137 allmost full), with the trailing PUNCT item included (last item).
138 If the trailing PUNC is a a primary phrase separator, the
139 item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
140 be output at the start of the next primary phrase.
141 Otherwise,
142 the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
143 so that a BOUND of type SBEG is output at the start of the next primary phrase.
144
145 trailing PUNC item bound states
146 SSEP PPHR
147 PUNC(SENTEND, X) B(B,X)>SSEP B(P1,X)>SSEP (X = T | Q | E)
148 PUNC(FLUSH, T) B(B,T)>SSEP* B(P1,T)>SSEP
149 PUNC(PHRASEEND, P) B(B,P)>PPHR B(P1,P)>PPHR
150 PUNC(PHRASEEND, FORC) B(B,P)>PPHR B(P1,P)>PPHR
151
152 * If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
153 all but the first will be treated as an (empty) phrase containing just this item.
154 If this (single) item is a flush, creation of SBEG is suppressed.
155
156
157 - dtphr phrasing tree (rather subphrasing tree it should be called)
158 determines
159 BOUND_PHR2
160 BOUND_PHR3
161 - boundary strenghts are determined for every word (except the
162 first one) from right-to-left. The boundary types mark the phrase
163 type of the phrase following the boundary.
164 - number of items actually changed (new BOUND items added): because
165 of fixed size without content, two fields are contained in headx
166 to indicate if a BOUND needs to be added to the LEFT of the item.
167 -> headx further extended with boundary strength and type info to
168 indicate that to the left of the headx ele a BOUND needs to be
169 inserted when outputting.
170
171 4. accentuation:
172 - number of items unchanged, content unchanged, only head info changes
173 -> changed in place in headx
174 */
175
176
177 typedef struct {
178 picodata_itemhead_t head;
179 picoos_uint16 cind;
180 } picosa_headx_t;
181
182
183 typedef struct sa_subobj {
184 picoos_uint8 procState; /* for next processing step decision */
185
186 picoos_uint8 inspaceok; /* flag: headx/cbuf1 has space for an item */
187 picoos_uint8 needsmoreitems; /* flag: need more items */
188 picoos_uint8 phonesTransduced; /* flag: */
189
190 picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE]; /* tmp. location for an item */
191
192 picosa_headx_t headx[PICOSA_MAXNR_HEADX];
193 picoos_uint16 headxBottom; /* bottom */
194 picoos_uint16 headxLen; /* length, 0 if empty */
195
196 picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF];
197 picoos_uint16 cbuf1BufSize; /* actually allocated size */
198 picoos_uint16 cbuf1Len; /* length, 0 if empty */
199
200 picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF];
201 picoos_uint16 cbuf2BufSize; /* actually allocated size */
202 picoos_uint16 cbuf2Len; /* length, 0 if empty */
203
204 picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1];
205 picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1];
206 picotrns_possym_t * phonBuf;
207 picotrns_possym_t * phonBufOut;
208 picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
209 picoos_uint16 nextReadPos; /* position of (potential) next item to read from */
210
211
212 /* buffer for internal calculation of transducer */
213 picotrns_AltDesc altDescBuf;
214 /* the number of AltDesc in the buffer */
215 picoos_uint16 maxAltDescLen;
216
217 /* tab knowledge base */
218 picoktab_Graphs tabgraphs;
219 picoktab_Phones tabphones;
220 picoktab_Pos tabpos;
221 picoktab_FixedIds fixedIds;
222
223 /* dtposd knowledge base */
224 picokdt_DtPosD dtposd;
225
226 /* dtg2p knowledge base */
227 picokdt_DtG2P dtg2p;
228
229 /* lex knowledge base */
230 picoklex_Lex lex;
231
232 /* ulex knowledge bases */
233 picoos_uint8 numUlex;
234 picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX];
235
236 /* fst knowledge bases */
237 picoos_uint8 numFsts;
238 picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS];
239 picoos_uint8 curFst; /* the fst to be applied next */
240
241
242 } sa_subobj_t;
243
244
saInitialize(register picodata_ProcessingUnit this,picoos_int32 r_mode)245 static pico_status_t saInitialize(register picodata_ProcessingUnit this, picoos_int32 r_mode) {
246 sa_subobj_t * sa;
247 picoos_uint16 i;
248 picokfst_FST fst;
249 picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY;
250 picoklex_Lex ulex;
251 picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY;
252
253 PICODBG_DEBUG(("calling"));
254
255 if (NULL == this || NULL == this->subObj) {
256 return picoos_emRaiseException(this->common->em,
257 PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
258 }
259 sa = (sa_subobj_t *) this->subObj;
260
261 /* sa->common = this->common; */
262
263 sa->procState = SA_STEPSTATE_COLLECT;
264
265 sa->inspaceok = TRUE;
266 sa->needsmoreitems = TRUE;
267
268 sa->headxBottom = 0;
269 sa->headxLen = 0;
270 sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF;
271 sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF;
272 sa->cbuf1Len = 0;
273 sa->cbuf2Len = 0;
274
275 /* init headx, cbuf1, cbuf2 */
276 for (i = 0; i < PICOSA_MAXNR_HEADX; i++){
277 sa->headx[i].head.type = 0;
278 sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA;
279 sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA;
280 sa->headx[i].head.len = 0;
281 sa->headx[i].cind = 0;
282 }
283 for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) {
284 sa->cbuf1[i] = 0;
285 sa->cbuf2[i] = 0;
286 }
287
288
289 /* possym buffer */
290 sa->phonesTransduced = FALSE;
291 sa->phonBuf = sa->phonBufA;
292 sa->phonBufOut = sa->phonBufB;
293 sa->phonReadPos = 0;
294 sa->phonWritePos = 0;
295 sa->nextReadPos = 0;
296
297 if (r_mode == PICO_RESET_SOFT) {
298 /*following initializations needed only at startup or after a full reset*/
299 return PICO_OK;
300 }
301
302 /* kb fst[] */
303 sa->numFsts = 0;
304 for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) {
305 fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]);
306 if (NULL != fst) {
307 sa->fst[sa->numFsts++] = fst;
308 }
309 }
310 sa->curFst = 0;
311 PICODBG_DEBUG(("got %i fsts", sa->numFsts));
312 /* kb fixedIds */
313 sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]);
314
315 /* kb tabgraphs */
316 sa->tabgraphs =
317 picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
318 if (sa->tabgraphs == NULL) {
319 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
320 NULL, NULL);
321 }
322 PICODBG_DEBUG(("got tabgraphs"));
323
324 /* kb tabphones */
325 sa->tabphones =
326 picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
327 if (sa->tabphones == NULL) {
328 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
329 NULL, NULL);
330 }
331 PICODBG_DEBUG(("got tabphones"));
332
333 #ifdef PICO_DEBU
334 {
335 picoos_uint16 itmp;
336 for (itmp = 0; itmp < 256; itmp++) {
337 if (picoktab_hasVowelProp(sa->tabphones, itmp)) {
338 PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
339 }
340 if (picoktab_hasDiphthProp(sa->tabphones, itmp)) {
341 PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
342 }
343 if (picoktab_hasGlottProp(sa->tabphones, itmp)) {
344 PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
345 }
346 if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) {
347 PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
348 }
349 if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) {
350 PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
351 }
352 if (picoktab_isPrimstress(sa->tabphones, itmp)) {
353 PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
354 }
355 if (picoktab_isSecstress(sa->tabphones, itmp)) {
356 PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
357 }
358 if (picoktab_isSyllbound(sa->tabphones, itmp)) {
359 PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
360 }
361 if (picoktab_isPause(sa->tabphones, itmp)) {
362 PICODBG_DEBUG(("tabphones isPause: %d", itmp));
363 }
364 }
365
366 PICODBG_DEBUG(("tabphones primstressID: %d",
367 picoktab_getPrimstressID(sa->tabphones)));
368 PICODBG_DEBUG(("tabphones secstressID: %d",
369 picoktab_getSecstressID(sa->tabphones)));
370 PICODBG_DEBUG(("tabphones syllboundID: %d",
371 picoktab_getSyllboundID(sa->tabphones)));
372 PICODBG_DEBUG(("tabphones pauseID: %d",
373 picoktab_getPauseID(sa->tabphones)));
374 }
375 #endif
376
377 /* kb tabpos */
378 sa->tabpos =
379 picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]);
380 if (sa->tabpos == NULL) {
381 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
382 NULL, NULL);
383 }
384 PICODBG_DEBUG(("got tabpos"));
385
386 /* kb dtposd */
387 sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]);
388 if (sa->dtposd == NULL) {
389 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
390 NULL, NULL);
391 }
392 PICODBG_DEBUG(("got dtposd"));
393
394 /* kb dtg2p */
395 sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]);
396 if (sa->dtg2p == NULL) {
397 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
398 NULL, NULL);
399 }
400 PICODBG_DEBUG(("got dtg2p"));
401
402 /* kb lex */
403 sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]);
404 if (sa->lex == NULL) {
405 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
406 NULL, NULL);
407 }
408 PICODBG_DEBUG(("got lex"));
409
410 /* kb ulex[] */
411 sa->numUlex = 0;
412 for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) {
413 ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]);
414 if (NULL != ulex) {
415 sa->ulex[sa->numUlex++] = ulex;
416 }
417 }
418 PICODBG_DEBUG(("got %i user lexica", sa->numUlex));
419
420 return PICO_OK;
421 }
422
423 static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
424 picoos_int16 mode,
425 picoos_uint16 *numBytesOutput);
426
saTerminate(register picodata_ProcessingUnit this)427 static pico_status_t saTerminate(register picodata_ProcessingUnit this) {
428 return PICO_OK;
429 }
430
saSubObjDeallocate(register picodata_ProcessingUnit this,picoos_MemoryManager mm)431 static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this,
432 picoos_MemoryManager mm) {
433 sa_subobj_t * sa;
434 if (NULL != this) {
435 sa = (sa_subobj_t *) this->subObj;
436 picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
437 picoos_deallocate(mm, (void *) &this->subObj);
438 }
439 return PICO_OK;
440 }
441
442
picosa_newSentAnaUnit(picoos_MemoryManager mm,picoos_Common common,picodata_CharBuffer cbIn,picodata_CharBuffer cbOut,picorsrc_Voice voice)443 picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm,
444 picoos_Common common,
445 picodata_CharBuffer cbIn,
446 picodata_CharBuffer cbOut,
447 picorsrc_Voice voice) {
448 picodata_ProcessingUnit this;
449 sa_subobj_t * sa;
450 this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
451 if (this == NULL) {
452 return NULL;
453 }
454
455 this->initialize = saInitialize;
456 PICODBG_DEBUG(("set this->step to saStep"));
457 this->step = saStep;
458 this->terminate = saTerminate;
459 this->subDeallocate = saSubObjDeallocate;
460
461 this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t));
462 if (this->subObj == NULL) {
463 picoos_deallocate(mm, (void *)&this);
464 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
465 return NULL;
466 }
467
468 sa = (sa_subobj_t *) this->subObj;
469
470 sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen);
471 if (NULL == sa->altDescBuf) {
472 picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
473 picoos_deallocate(mm, (void *)&sa);
474 picoos_deallocate(mm, (void *)&this);
475 picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL);
476 }
477
478
479 saInitialize(this, PICO_RESET_FULL);
480 return this;
481 }
482
483
484 /* ***********************************************************************/
485 /* PROCESS_POSD disambiguation functions */
486 /* ***********************************************************************/
487
488 /* find next POS to the right of 'ind' and return its POS and index */
saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,register sa_subobj_t * sa,const picoos_uint16 ind,const picoos_uint16 top,picoos_uint16 * rightind)489 static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,
490 register sa_subobj_t *sa,
491 const picoos_uint16 ind,
492 const picoos_uint16 top,
493 picoos_uint16 *rightind) {
494 picoos_uint8 val;
495 picoos_int32 i;
496
497 val = PICOKDT_EPSILON;
498 for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) {
499 if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) ||
500 (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX) ||
501 (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) {
502 val = sa->headx[i].head.info1;
503 }
504 }
505 *rightind = i - 1;
506 return val;
507 }
508
509
510 /* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */
saDisambPos(register picodata_ProcessingUnit this,register sa_subobj_t * sa)511 static pico_status_t saDisambPos(register picodata_ProcessingUnit this,
512 register sa_subobj_t *sa) {
513 picokdt_classify_result_t dtres;
514 picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1;
515 picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */
516 picoos_uint16 prevout; /* direct dt output (hist.) or POS of prev word */
517 picoos_uint16 lastprev3; /* last index of POS(es) found to the left */
518 picoos_uint16 curPOS; /* POS(es) of current word */
519 picoos_int32 first; /* index of first item with POS(es) */
520 picoos_int32 ci;
521 picoos_uint8 okay; /* two uses: processing okay and lexind resovled */
522 picoos_uint8 i;
523 picoos_uint16 inval;
524 picoos_uint16 fallback;
525
526 /* set initial values */
527 okay = TRUE;
528 prevout = PICOKDT_HISTORY_ZERO;
529 curPOS = PICODATA_ITEMINFO1_ERR;
530 first = 0;
531
532 while ((first < sa->headxLen) &&
533 (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) &&
534 (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) &&
535 (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) {
536 first++;
537 }
538 if (first >= sa->headxLen) {
539 /* phrase not containing an item with POSes info, e.g. single flush */
540 PICODBG_DEBUG(("no item with POSes found"));
541 return PICO_OK;
542 }
543
544 lastprev3 = first;
545
546 for (i = 0; i <= half_nratt_posd; i++) {
547 valbuf[i] = PICOKDT_HISTORY_ZERO;
548 }
549 /* set POS(es) of current word, will be shifted afterwards */
550 valbuf[half_nratt_posd+1] = sa->headx[first].head.info1;
551 for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) {
552 /* find next POS to the right and set valbuf[i] */
553 valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
554 }
555
556 PICODBG_TRACE(("headxLen: %d", sa->headxLen));
557
558 /* process from left to right all items in headx */
559 for (ci = first; ci < sa->headxLen; ci++) {
560 okay = TRUE;
561
562 PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type));
563
564 /* if not (WORDGRAPH or WORDINDEX) */
565 if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) &&
566 (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX) &&
567 (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) {
568 continue;
569 }
570
571 PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1));
572
573 /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */
574 /* shift all elements one position to the left */
575 /* shift predicted values (history) */
576 for (i=1; i<half_nratt_posd; i++) {
577 valbuf[i-1] = valbuf[i];
578 }
579 /* insert previously predicted value (now history) */
580 valbuf[half_nratt_posd-1] = prevout;
581 /* shift not yet predicted values */
582 for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) {
583 valbuf[i-1] = valbuf[i];
584 }
585 /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */
586 valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
587
588 /* just to be on the safe side; the following should never happen */
589 if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) {
590 PICODBG_WARN(("syncing POS"));
591 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
592 NULL, NULL);
593 valbuf[half_nratt_posd] = sa->headx[ci].head.info1;
594 }
595
596 curPOS = valbuf[half_nratt_posd];
597
598 /* Check if POS disambiguation not needed */
599 if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) {
600 /* not needed */
601 inval = 0;
602 fallback = 0;
603 if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS,
604 &prevout, &fallback)) {
605 if (fallback) {
606 prevout = fallback;
607
608 } else {
609 PICODBG_ERROR(("problem doing reverse output mapping"));
610 prevout = curPOS;
611 }
612 }
613 PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1));
614 continue;
615 }
616
617 /* assuming PICOKDT_NRATT_POSD == 7 */
618 PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]",
619 ci, valbuf[0], valbuf[1], valbuf[2],
620 valbuf[3], valbuf[4], valbuf[5], valbuf[6]));
621
622 /* no continue so far => POS disambiguation needed */
623 /* construct input vector, which is set in dtposd */
624 if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) {
625 /* error constructing invec */
626 PICODBG_WARN(("problem with invec"));
627 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
628 NULL, NULL);
629 okay = FALSE;
630 }
631 /* classify */
632 if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) {
633 /* error doing classification */
634 PICODBG_WARN(("problem classifying"));
635 picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
636 NULL, NULL);
637 okay = FALSE;
638 }
639 /* decompose */
640 if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) {
641 /* error decomposing */
642 PICODBG_WARN(("problem decomposing"));
643 picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
644 NULL, NULL);
645 okay = FALSE;
646 }
647 if (okay && dtres.set) {
648 PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class));
649 } else {
650 PICODBG_WARN(("problem disambiguating POS"));
651 dtres.class = PICODATA_ITEMINFO1_ERR;
652 }
653
654 if (dtres.class > 255) {
655 PICODBG_WARN(("dt result outside valid range, setting pos to ERR"));
656 dtres.class = PICODATA_ITEMINFO1_ERR;
657 }
658
659 sa->headx[ci].head.info1 = (picoos_uint8)dtres.class;
660 if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) {
661 /* find pos/ind entry in cbuf matching unique,
662 disambiguated POS, adapt current headx cind/len
663 accordingly */
664 PICODBG_DEBUG(("select phon based on POS disambiguation"));
665 okay = FALSE;
666 for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) {
667 PICODBG_DEBUG(("comparing POS at cind + %d", i));
668 if (picoktab_isPartOfPosGroup(sa->tabpos,
669 (picoos_uint8)dtres.class,
670 sa->cbuf1[sa->headx[ci].cind + i])) {
671 PICODBG_DEBUG(("found match for entry %d",
672 i/PICOKLEX_POSIND_SIZE + 1));
673 sa->headx[ci].cind += i;
674 okay = TRUE;
675 break;
676 }
677 }
678 /* not finding a match is possible if posd predicts a POS that
679 is not part of any of the input POSes -> no warning */
680 #if defined(PICO_DEBUG)
681 if (!okay) {
682 PICODBG_DEBUG(("no match found, selecting 1st entry"));
683 }
684 #endif
685 sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE;
686 }
687 }
688 return PICO_OK;
689 }
690
691
692 /* ***********************************************************************/
693 /* PROCESS_WPHO functions, copy, lexindex, and g2p */
694 /* ***********************************************************************/
695
696 /* ************** copy ***************/
697
saCopyItemContent1to2(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoos_uint16 ind)698 static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this,
699 register sa_subobj_t *sa,
700 picoos_uint16 ind) {
701 picoos_uint16 i;
702 picoos_uint16 cind1;
703
704 /* set headx.cind, and copy content, head unchanged */
705 cind1 = sa->headx[ind].cind;
706 sa->headx[ind].cind = sa->cbuf2Len;
707
708 /* check cbufLen */
709 if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) {
710 sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len;
711 PICODBG_WARN(("phones skipped"));
712 picoos_emRaiseWarning(this->common->em,
713 PICO_WARN_INCOMPLETE, NULL, NULL);
714 if (sa->headx[ind].head.len == 0) {
715 sa->headx[ind].cind = 0;
716 }
717 }
718
719 for (i = 0; i < sa->headx[ind].head.len; i++) {
720 sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i];
721 sa->cbuf2Len++;
722 }
723
724 PICODBG_DEBUG(("%c item, len: %d",
725 sa->headx[ind].head.type, sa->headx[ind].head.len));
726
727 return PICO_OK;
728 }
729
730
731 /* ************** lexindex ***************/
732
saLexIndLookup(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoklex_Lex lex,picoos_uint16 ind)733 static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this,
734 register sa_subobj_t *sa,
735 picoklex_Lex lex,
736 picoos_uint16 ind) {
737 picoos_uint8 pos;
738 picoos_uint8 *phones;
739 picoos_uint8 plen;
740 picoos_uint16 i;
741
742 if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]),
743 PICOKLEX_IND_SIZE, &pos, &phones, &plen)) {
744 sa->headx[ind].cind = sa->cbuf2Len;
745
746 /* check cbufLen */
747 if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) {
748 plen = sa->cbuf2BufSize - sa->cbuf2Len;
749 PICODBG_WARN(("phones skipped"));
750 picoos_emRaiseWarning(this->common->em,
751 PICO_WARN_INCOMPLETE, NULL, NULL);
752 if (plen == 0) {
753 sa->headx[ind].cind = 0;
754 }
755 }
756
757 /* set item head, info1, info2 unchanged */
758 sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
759 sa->headx[ind].head.len = plen;
760
761 for (i = 0; i < plen; i++) {
762 sa->cbuf2[sa->cbuf2Len] = phones[i];
763 sa->cbuf2Len++;
764 }
765
766 PICODBG_DEBUG(("%c item, pos: %d, plen: %d",
767 PICODATA_ITEM_WORDPHON, pos, plen));
768
769 } else {
770 PICODBG_WARN(("lexIndLookup problem"));
771 picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
772 NULL, NULL);
773 }
774 return PICO_OK;
775 }
776
777
778
779 /* ************** g2p ***************/
780
781
782 /* Name : saGetNvowel
783 Function: returns vowel info in a word or word seq
784 Input : sInChar the grapheme string to be converted in phoneme
785 inLen number of bytes in grapheme buffer
786 inPos start position of current grapheme (0..inLen-1)
787 Output : nVow number of vowels in the word
788 nVord vowel order in the word
789 Returns : TRUE: processing successful; FALSE: errors
790 */
saGetNrVowel(register picodata_ProcessingUnit this,register sa_subobj_t * sa,const picoos_uint8 * sInChar,const picoos_uint16 inLen,const picoos_uint8 inPos,picoos_uint8 * nVow,picoos_uint8 * nVord)791 static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this,
792 register sa_subobj_t *sa,
793 const picoos_uint8 *sInChar,
794 const picoos_uint16 inLen,
795 const picoos_uint8 inPos,
796 picoos_uint8 *nVow,
797 picoos_uint8 *nVord) {
798 picoos_uint32 nCount;
799 picoos_uint32 pos;
800 picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1];
801
802 /*defaults*/
803 *nVow = 0;
804 *nVord = 0;
805 /*1:check wether the current char is a vowel*/
806 pos = inPos;
807 if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) ||
808 !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) {
809 return FALSE;
810 }
811 /*2:count number of vowels in current word and find vowel order*/
812 for (nCount = 0; nCount < inLen; ) {
813 if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) {
814 return FALSE;
815 }
816 if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr,
817 PICOBASE_UTF8_MAXLEN)) {
818 (*nVow)++;
819 if (nCount == pos) {
820 (*nVord) = (*nVow);
821 }
822 }
823 }
824 return TRUE;
825 }
826
827
828 /* do g2p for a full word, right-to-left */
saDoG2P(register picodata_ProcessingUnit this,register sa_subobj_t * sa,const picoos_uint8 * graph,const picoos_uint8 graphlen,const picoos_uint8 pos,picoos_uint8 * phones,const picoos_uint16 phonesmaxlen,picoos_uint16 * plen)829 static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this,
830 register sa_subobj_t *sa,
831 const picoos_uint8 *graph,
832 const picoos_uint8 graphlen,
833 const picoos_uint8 pos,
834 picoos_uint8 *phones,
835 const picoos_uint16 phonesmaxlen,
836 picoos_uint16 *plen) {
837 picoos_uint16 outNp1Ch; /*last 3 outputs produced*/
838 picoos_uint16 outNp2Ch;
839 picoos_uint16 outNp3Ch;
840 picoos_uint8 nPrimary;
841 picoos_uint8 nCount;
842 picoos_uint32 utfpos;
843 picoos_uint16 nOutVal;
844 picoos_uint8 okay;
845 picoos_uint16 phonesind;
846 picoos_uint8 nrvow;
847 picoos_uint8 ordvow;
848 picokdt_classify_vecresult_t dtresv;
849 picoos_uint16 i;
850
851 *plen = 0;
852 okay = TRUE;
853
854 /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the
855 phones which are predicted in reverse order. Once all are
856 available put them in phones in usuable order. phonesind is
857 used to fille item in reverse order starting at the end of
858 tmpbuf. */
859 phonesind = PICOSA_MAXITEMSIZE - 1;
860
861 /* prepare the data for loop operations */
862 outNp1Ch = PICOKDT_HISTORY_ZERO;
863 outNp2Ch = PICOKDT_HISTORY_ZERO;
864 outNp3Ch = PICOKDT_HISTORY_ZERO;
865
866 /* inner loop */
867 nPrimary = 0;
868
869 /* ************************************************/
870 /* go backward grapheme by grapheme, it's utf8... */
871 /* ************************************************/
872
873 /* set start nCount to position of start of last utfchar */
874 /* ! watch out! somethimes starting at 1, sometimes at 0,
875 ! sometimes counting per byte, sometimes per UTF8 char */
876 /* nCount is (start position + 1) of utf8 char */
877 utfpos = graphlen;
878 if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
879 nCount = utfpos + 1;
880 } else {
881 /* should not occurr */
882 PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen));
883 return FALSE;
884 }
885
886 while (nCount > 0) {
887 PICODBG_TRACE(("right-to-left g2p, count: %d", nCount));
888 okay = TRUE;
889
890 if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow,
891 &ordvow)) {
892 nrvow = 0;
893 ordvow = 0;
894 }
895
896 /* prepare input vector, set inside tree object invec,
897 * g2pBuildVector will call the constructInVec tree method */
898 if (!picokdt_dtG2PconstructInVec(sa->dtg2p,
899 graph, /*grapheme start*/
900 graphlen, /*grapheme length*/
901 nCount-1, /*grapheme current position*/
902 pos, /*Word POS*/
903 nrvow, /*nr vowels if vowel, 0 else */
904 ordvow, /*ord of vowel if vowel, 0 el*/
905 &nPrimary, /*primary stress flag*/
906 outNp1Ch, /*Right phoneme context +1*/
907 outNp2Ch, /*Right phoneme context +2*/
908 outNp3Ch)) { /*Right phon context +3*/
909 /*Errors in preparing the input vector : skip processing*/
910 PICODBG_WARN(("problem with invec"));
911 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
912 NULL, NULL);
913 okay = FALSE;
914 }
915
916 /* classify using the invec in the tree object and save the direct
917 tree output also in the tree object */
918 if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) {
919 /* error doing classification */
920 PICODBG_WARN(("problem classifying"));
921 picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922 NULL, NULL);
923 okay = FALSE;
924 }
925
926 /* decompose the invec in the tree object and return result in dtresv */
927 if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) {
928 /* error decomposing */
929 PICODBG_WARN(("problem decomposing"));
930 picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
931 NULL, NULL);
932 okay = FALSE;
933 }
934
935 if (okay) {
936 if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) {
937 /* no phones to be added */
938 PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1]));
939 ;
940 } else {
941 /* add decomposed output to tmpbuf, reverse order */
942 for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) -
943 phonesind)<phonesmaxlen) &&
944 (i > 0)); ) {
945 i--;
946 PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i]));
947 if (dtresv.classvec[i] > 255) {
948 PICODBG_WARN(("dt result outside valid range, "
949 "skipping phone"));
950 continue;
951 }
952 sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i];
953 if (!nPrimary) {
954 if (picoktab_isPrimstress(sa->tabphones,
955 (picoos_uint8)dtresv.classvec[i])) {
956 nPrimary = 1;
957 }
958 }
959 (*plen)++;
960 }
961 if (i > 0) {
962 PICODBG_WARN(("phones skipped"));
963 picoos_emRaiseWarning(this->common->em,
964 PICO_WARN_INCOMPLETE, NULL, NULL);
965 }
966 }
967 }
968
969 /*shift tree output history and update*/
970 outNp3Ch = outNp2Ch;
971 outNp2Ch = outNp1Ch;
972 outNp1Ch = nOutVal;
973
974 /* go backward one utf8 char */
975 /* nCount is in +1 domain */
976 if (nCount <= 1) {
977 /* end of str */
978 nCount = 0;
979 } else {
980 utfpos = nCount - 1;
981 if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
982 /* should not occur */
983 PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos));
984 return FALSE;
985 } else {
986 nCount = utfpos + 1;
987 }
988 }
989 }
990
991 /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */
992 /* now that we have all phone IDs, copy in correct order to phones */
993 /* phonesind point to next free slot in the reverse domainn,
994 ie. inc first */
995 phonesind++;
996 for (i = 0; i < *plen; i++, phonesind++) {
997 phones[i] = sa->tmpbuf[phonesind];
998 }
999 return TRUE;
1000 }
1001
1002
1003 /* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */
1004
saGraphemeToPhoneme(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoos_uint16 ind)1005 static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this,
1006 register sa_subobj_t *sa,
1007 picoos_uint16 ind) {
1008 picoos_uint16 plen;
1009
1010 PICODBG_TRACE(("starting g2p"));
1011
1012 if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]),
1013 sa->headx[ind].head.len, sa->headx[ind].head.info1,
1014 &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len),
1015 &plen)) {
1016
1017 /* check of cbuf2Len done in saDoG2P, phones skipped if needed */
1018 if (plen > 255) {
1019 PICODBG_WARN(("maximum number of phones exceeded (%d), skipping",
1020 plen));
1021 plen = 255;
1022 }
1023
1024 /* set item head, info1, info2 unchanged */
1025 sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
1026 sa->headx[ind].head.len = (picoos_uint8)plen;
1027 sa->headx[ind].cind = sa->cbuf2Len;
1028 sa->cbuf2Len += plen;
1029 PICODBG_DEBUG(("%c item, plen: %d",
1030 PICODATA_ITEM_WORDPHON, plen));
1031 } else {
1032 PICODBG_WARN(("problem doing g2p"));
1033 picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
1034 NULL, NULL);
1035 }
1036 return PICO_OK;
1037 }
1038
1039
1040 /* ***********************************************************************/
1041 /* extract phonemes of an item into a phonBuf */
1042 /* ***********************************************************************/
1043
saAddPhoneme(register sa_subobj_t * sa,picoos_uint16 pos,picoos_uint16 sym)1044 static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) {
1045 /* picoos_uint8 plane, unshifted; */
1046
1047 /* just for debuging */
1048 /*
1049 unshifted = picotrns_unplane(sym,&plane);
1050 PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos));
1051 */
1052 if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) {
1053 /* not an error! */
1054 PICODBG_DEBUG(("couldn't add because phon buffer full"));
1055 return PICO_EXC_BUF_OVERFLOW;
1056 } else {
1057 sa->phonBuf[sa->phonWritePos].pos = pos;
1058 sa->phonBuf[sa->phonWritePos].sym = sym;
1059 sa->phonWritePos++;
1060 return PICO_OK;
1061 }
1062 }
1063
1064 /*
1065 static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) {
1066 return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1067 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1068 }
1069
1070
1071 static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) {
1072 return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1073 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1074 }
1075
1076 */
1077
saExtractPhonemes(register picodata_ProcessingUnit this,register sa_subobj_t * sa,picoos_uint16 pos,picodata_itemhead_t * head,const picoos_uint8 * content)1078 static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this,
1079 register sa_subobj_t *sa, picoos_uint16 pos,
1080 picodata_itemhead_t* head, const picoos_uint8* content)
1081 {
1082 pico_status_t rv= PICO_OK;
1083 picoos_uint8 i;
1084 picoos_int16 fstSymbol;
1085 #if defined(PICO_DEBUG)
1086 picoos_char msgstr[SA_MSGSTR_SIZE];
1087 #endif
1088
1089 PICODBG_TRACE(("doing item %s",
1090 picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE)));
1091 /*
1092 Items considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as
1093 'pos'.
1094 Elements that go into the transduction receive "their" position in the buffer.
1095 */
1096 sa->phonWritePos = 0;
1097 /* WORDPHON(POS,WACC)phon */
1098 rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1099 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
1100 for (i = 0; i < head->len; i++) {
1101 fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i];
1102 /* */
1103 PICODBG_TRACE(("adding phoneme %c",fstSymbol));
1104 rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol);
1105 }
1106 rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
1107 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
1108 sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE + head->len;
1109 return rv;
1110 }
1111
1112
1113 #define SA_POSSYM_OK 0
1114 #define SA_POSSYM_OUT_OF_RANGE 1
1115 #define SA_POSSYM_END 2
1116 #define SA_POSSYM_INVALID -3
1117 /* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside
1118 * buf).
1119 * 'rangeEnd' is the first possym position outside the desired range.
1120 * Possible return values:
1121 * SA_POSSYM_OK : 'pos' and 'sym' are set to the read possym, *readPos is advanced
1122 * SA_POSSYM_OUT_OF_RANGE : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined
1123 * SA_POSSYM_UNDERFLOW : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined
1124 * SA_POSSYM_INVALID : "strange" pos. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined
1125 */
getNextPosSym(sa_subobj_t * sa,picoos_int16 * pos,picoos_int16 * sym,picoos_int16 rangeEnd)1126 static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym,
1127 picoos_int16 rangeEnd) {
1128 /* skip POS_IGNORE */
1129 while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos)) {
1130 PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos));
1131 sa->phonReadPos++;
1132 }
1133 if ((sa->phonReadPos < sa->phonWritePos)) {
1134 *pos = sa->phonBuf[sa->phonReadPos].pos;
1135 if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) {
1136 *sym = sa->phonBuf[sa->phonReadPos++].sym;
1137 return SA_POSSYM_OK;
1138 } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */
1139 return SA_POSSYM_INVALID;
1140 } else {
1141 return SA_POSSYM_OUT_OF_RANGE;
1142 }
1143 } else {
1144 /* no more possyms to read */
1145 *pos = PICOTRNS_POS_INVALID;
1146 return SA_POSSYM_END;
1147 }
1148 }
1149
1150
1151
1152
1153 /* ***********************************************************************/
1154 /* saStep function */
1155 /* ***********************************************************************/
1156
1157 /*
1158 complete phrase processed in one step, if not fast enough -> rework
1159
1160 init, collect into internal buffer, process, and then feed to
1161 output buffer
1162
1163 init state: INIT ext ext
1164 state trans: in hc1 hc2 out
1165
1166 INIT | putItem = 0 0 +1 | BUSY -> COLL (put B-SBEG item,
1167 set do-init to false)
1168
1169 inspace-ok-hc1
1170 needs-more-items-(phrase-or-flush)
1171 COLL1 |getItems -n +n 0 1 | ATOMIC -> PPOSD (got items,
1172 if flush set do-init)
1173 COLL2 |getItems -n +n 1 0 | ATOMIC -> PPOSD (got items, forced)
1174 COLL3 |getItems -n +n 1 1 | IDLE (got items, need more)
1175 COLL4 |getItems = = 1 1 | IDLE (got no items)
1176
1177 PPOSD | posd = ~n~n | BUSY -> PWP (posd done)
1178 PWP | lex/g2p = ~n-n 0+n | BUSY -> PPHR (lex/g2p done)
1179 PPHR | phr = -n 0 +m=n | BUSY -> PACC (phr done, m>=n)
1180 PACC | acc = 0 0 ~m=n | BUSY -> FEED (acc done)
1181
1182 doinit-flag
1183 FEED | putItems 0 0 0 -m-n +m 0 | BUSY -> COLL (put items)
1184 FEED | putItems 0 0 0 -m-n +m 1 | BUSY -> INIT (put items)
1185 FEED | putItems 0 0 0 -d-d +d | OUT_FULL (put some items)
1186 */
1187
saStep(register picodata_ProcessingUnit this,picoos_int16 mode,picoos_uint16 * numBytesOutput)1188 static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
1189 picoos_int16 mode,
1190 picoos_uint16 *numBytesOutput) {
1191 register sa_subobj_t *sa;
1192 pico_status_t rv = PICO_OK;
1193 pico_status_t rvP = PICO_OK;
1194 picoos_uint16 blen = 0;
1195 picoos_uint16 clen = 0;
1196 picoos_uint16 i;
1197 picoklex_Lex lex;
1198
1199
1200 if (NULL == this || NULL == this->subObj) {
1201 return PICODATA_PU_ERROR;
1202 }
1203 sa = (sa_subobj_t *) this->subObj;
1204 mode = mode; /* avoid warning "var not used in this function"*/
1205 *numBytesOutput = 0;
1206 while (1) { /* exit via return */
1207 PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d",
1208 sa->procState, sa->headxLen, sa->cbuf1Len,
1209 sa->cbuf2Len));
1210
1211 switch (sa->procState) {
1212
1213 /* *********************************************************/
1214 /* collect state: get item(s) from charBuf and store in
1215 * internal buffers, need a complete punctuation-phrase
1216 */
1217 case SA_STEPSTATE_COLLECT:
1218
1219 while (sa->inspaceok && sa->needsmoreitems
1220 && (PICO_OK ==
1221 (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf,
1222 PICOSA_MAXITEMSIZE, &blen)))) {
1223 rvP = picodata_get_itemparts(sa->tmpbuf,
1224 PICOSA_MAXITEMSIZE,
1225 &(sa->headx[sa->headxLen].head),
1226 &(sa->cbuf1[sa->cbuf1Len]),
1227 sa->cbuf1BufSize-sa->cbuf1Len,
1228 &clen);
1229 if (rvP != PICO_OK) {
1230 PICODBG_ERROR(("problem getting item parts"));
1231 picoos_emRaiseException(this->common->em, rvP,
1232 NULL, NULL);
1233 return PICODATA_PU_ERROR;
1234 }
1235
1236 /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1237 construct PUNC-FLUSH item in headx */
1238 if ((sa->headx[sa->headxLen].head.type ==
1239 PICODATA_ITEM_CMD) &&
1240 (sa->headx[sa->headxLen].head.info1 ==
1241 PICODATA_ITEMINFO1_CMD_FLUSH)) {
1242 sa->headx[sa->headxLen].head.type =
1243 PICODATA_ITEM_PUNC;
1244 sa->headx[sa->headxLen].head.info1 =
1245 PICODATA_ITEMINFO1_PUNC_FLUSH;
1246 sa->headx[sa->headxLen].head.info2 =
1247 PICODATA_ITEMINFO2_PUNC_SENT_T;
1248 sa->headx[sa->headxLen].head.len = 0;
1249 }
1250
1251 /* convert opening phoneme command to WORDPHON
1252 * and assign user-POS XX to it (Bug 432) */
1253 sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1254 /* maybe overwritten later */
1255 if ((sa->headx[sa->headxLen].head.type ==
1256 PICODATA_ITEM_CMD) &&
1257 (sa->headx[sa->headxLen].head.info1 ==
1258 PICODATA_ITEMINFO1_CMD_PHONEME)&&
1259 (sa->headx[sa->headxLen].head.info2 ==
1260 PICODATA_ITEMINFO2_CMD_START)) {
1261 picoos_uint8 i;
1262 picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones);
1263 PICODBG_INFO(("wordsep id is %i",wordsep));
1264 sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON;
1265 sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX;
1266 sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA;
1267 /* cut off additional words */
1268 i = 0;
1269 while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) {
1270 PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i]));
1271
1272 i++;
1273 }
1274 if (i < sa->headx[sa->headxLen].head.len) {
1275 PICODBG_INFO(("cutting off superfluous phonetic words at %i",i));
1276 sa->headx[sa->headxLen].head.len = i;
1277 }
1278 }
1279
1280 /* check/set needsmoreitems */
1281 if (sa->headx[sa->headxLen].head.type ==
1282 PICODATA_ITEM_PUNC) {
1283 sa->needsmoreitems = FALSE;
1284 }
1285
1286 /* check/set inspaceok, keep spare slot for forcing */
1287 if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) ||
1288 ((sa->cbuf1BufSize - sa->cbuf1Len) <
1289 PICOSA_MAXITEMSIZE)) {
1290 sa->inspaceok = FALSE;
1291 }
1292
1293 if (clen > 0) {
1294 sa->headx[sa->headxLen].cind = sa->cbuf1Len;
1295 sa->cbuf1Len += clen;
1296 } else {
1297 sa->headx[sa->headxLen].cind = 0;
1298 }
1299 sa->headxLen++;
1300 }
1301
1302 if (!sa->needsmoreitems) {
1303 /* 1, phrase buffered */
1304 sa->procState = SA_STEPSTATE_PROCESS_POSD;
1305 return PICODATA_PU_ATOMIC;
1306 } else if (!sa->inspaceok) {
1307 /* 2, forced phrase end */
1308 /* at least one slot is still free, use it to
1309 force a trailing PUNC item */
1310 sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC;
1311 sa->headx[sa->headxLen].head.info1 =
1312 PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1313 sa->headx[sa->headxLen].head.info2 =
1314 PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1315 sa->headx[sa->headxLen].head.len = 0;
1316 sa->needsmoreitems = FALSE; /* not really needed for now */
1317 sa->headxLen++;
1318 PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1319 picoos_emRaiseWarning(this->common->em,
1320 PICO_WARN_FALLBACK, NULL,
1321 (picoos_char *)"forced phrase end");
1322 sa->procState = SA_STEPSTATE_PROCESS_POSD;
1323 return PICODATA_PU_ATOMIC;
1324 } else if (rv == PICO_EOF) {
1325 /* 3, 4 */
1326 return PICODATA_PU_IDLE;
1327 } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1328 (rv == PICO_EXC_BUF_OVERFLOW)) {
1329 /* error, no valid item in cb (UNDER) */
1330 /* or tmpbuf not large enough, not possible (OVER) */
1331 /* no exception raised, left for ctrl to handle */
1332 PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1333 return PICODATA_PU_ERROR;
1334 } else {
1335 /* error, only possible if cbGetItem implementation
1336 changes without this function being adapted*/
1337 PICODBG_ERROR(("untreated return value, rv: %d", rv));
1338 return PICODATA_PU_ERROR;
1339 }
1340 break;
1341
1342
1343 /* *********************************************************/
1344 /* process posd state: process items in headx/cbuf1
1345 * and change in place
1346 */
1347 case SA_STEPSTATE_PROCESS_POSD:
1348 /* ensure there is an item in inBuf */
1349 if (sa->headxLen > 0) {
1350 /* we have a phrase in headx, cbuf1 (can be
1351 single PUNC item without POS), do pos disamb */
1352 if (PICO_OK != saDisambPos(this, sa)) {
1353 picoos_emRaiseException(this->common->em,
1354 PICO_ERR_OTHER, NULL, NULL);
1355 return PICODATA_PU_ERROR;
1356 }
1357 sa->procState = SA_STEPSTATE_PROCESS_WPHO;
1358
1359 } else if (sa->headxLen == 0) { /* no items in inBuf */
1360 PICODBG_WARN(("no items in inBuf"));
1361 sa->procState = SA_STEPSTATE_COLLECT;
1362 return PICODATA_PU_BUSY;
1363 }
1364
1365 #if defined (PICO_DEBUG)
1366 if (1) {
1367 picoos_uint8 i, j, ittype;
1368 for (i = 0; i < sa->headxLen; i++) {
1369 ittype = sa->headx[i].head.type;
1370 PICODBG_INFO_CTX();
1371 PICODBG_INFO_MSG(("sa-d: ("));
1372 PICODBG_INFO_MSG(("'%c',", ittype));
1373 if ((32 <= sa->headx[i].head.info1) &&
1374 (sa->headx[i].head.info1 < 127) &&
1375 (ittype != PICODATA_ITEM_WORDGRAPH) &&
1376 (ittype != PICODATA_ITEM_WORDINDEX)) {
1377 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1378 } else {
1379 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1380 }
1381 if ((32 <= sa->headx[i].head.info2) &&
1382 (sa->headx[i].head.info2 < 127)) {
1383 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1384 } else {
1385 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1386 }
1387 PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1388
1389 for (j = 0; j < sa->headx[i].head.len; j++) {
1390 if ((ittype == PICODATA_ITEM_WORDGRAPH) ||
1391 (ittype == PICODATA_ITEM_CMD)) {
1392 PICODBG_INFO_MSG(("%c",
1393 sa->cbuf1[sa->headx[i].cind+j]));
1394 } else {
1395 PICODBG_INFO_MSG(("%4d",
1396 sa->cbuf1[sa->headx[i].cind+j]));
1397 }
1398 }
1399 PICODBG_INFO_MSG(("\n"));
1400 }
1401 }
1402 #endif
1403
1404 break;
1405
1406
1407 /* *********************************************************/
1408 /* process wpho state: process items in headx/cbuf1 and modify
1409 * headx in place and fill cbuf2
1410 */
1411 case SA_STEPSTATE_PROCESS_WPHO:
1412 /* ensure there is an item in inBuf */
1413 if (sa->headxLen > 0) {
1414 /* we have a phrase in headx, cbuf1 (can be single
1415 PUNC item), do lex lookup, g2p, or copy */
1416
1417 /* check if cbuf2 is empty as it should be */
1418 if (sa->cbuf2Len > 0) {
1419 /* enforce emptyness */
1420 PICODBG_WARN(("forcing empty cbuf2, discarding buf"));
1421 picoos_emRaiseWarning(this->common->em,
1422 PICO_WARN_PU_DISCARD_BUF,
1423 NULL, NULL);
1424 }
1425
1426 /* cbuf2 overflow avoided in saGrapheme*, saLexInd*,
1427 saCopyItem*, phones skipped if needed */
1428 for (i = 0; i < sa->headxLen; i++) {
1429 switch (sa->headx[i].head.type) {
1430 case PICODATA_ITEM_WORDGRAPH:
1431 if (PICO_OK != saGraphemeToPhoneme(this, sa,
1432 i)) {
1433 /* not possible, phones skipped if needed */
1434 picoos_emRaiseException(this->common->em,
1435 PICO_ERR_OTHER,
1436 NULL, NULL);
1437 return PICODATA_PU_ERROR;
1438 }
1439 break;
1440 case PICODATA_ITEM_WORDINDEX:
1441 if (0 == sa->headx[i].head.info2) {
1442 lex = sa->lex;
1443 } else {
1444 lex = sa->ulex[sa->headx[i].head.info2-1];
1445 }
1446 if (PICO_OK != saLexIndLookup(this, sa, lex, i)) {
1447 /* not possible, phones skipped if needed */
1448 picoos_emRaiseException(this->common->em,
1449 PICO_ERR_OTHER,
1450 NULL, NULL);
1451 return PICODATA_PU_ERROR;
1452 }
1453 break;
1454 default:
1455 /* copy item unmodified, ie. headx untouched,
1456 content from cbuf1 to cbuf2 */
1457 if (PICO_OK != saCopyItemContent1to2(this, sa,
1458 i)) {
1459 /* not possible, phones skipped if needed */
1460 picoos_emRaiseException(this->common->em,
1461 PICO_ERR_OTHER,
1462 NULL, NULL);
1463 return PICODATA_PU_ERROR;
1464 }
1465 break;
1466 }
1467 }
1468 /* set cbuf1 to empty */
1469 sa->cbuf1Len = 0;
1470 sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1471
1472 } else if (sa->headxLen == 0) { /* no items in inBuf */
1473 PICODBG_WARN(("no items in inBuf"));
1474 sa->procState = SA_STEPSTATE_COLLECT;
1475 return PICODATA_PU_BUSY;
1476 }
1477
1478 #if defined (PICO_DEBUG)
1479 if (1) {
1480 picoos_uint8 i, j, ittype;
1481 for (i = 0; i < sa->headxLen; i++) {
1482 ittype = sa->headx[i].head.type;
1483 PICODBG_INFO_CTX();
1484 PICODBG_INFO_MSG(("sa-g: ("));
1485 PICODBG_INFO_MSG(("'%c',", ittype));
1486 if ((32 <= sa->headx[i].head.info1) &&
1487 (sa->headx[i].head.info1 < 127) &&
1488 (ittype != PICODATA_ITEM_WORDPHON)) {
1489 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
1490 } else {
1491 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
1492 }
1493 if ((32 <= sa->headx[i].head.info2) &&
1494 (sa->headx[i].head.info2 < 127)) {
1495 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
1496 } else {
1497 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
1498 }
1499 PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
1500
1501 for (j = 0; j < sa->headx[i].head.len; j++) {
1502 if ((ittype == PICODATA_ITEM_CMD)) {
1503 PICODBG_INFO_MSG(("%c",
1504 sa->cbuf2[sa->headx[i].cind+j]));
1505 } else {
1506 PICODBG_INFO_MSG(("%4d",
1507 sa->cbuf2[sa->headx[i].cind+j]));
1508 }
1509 }
1510 PICODBG_INFO_MSG(("\n"));
1511 }
1512 }
1513 #endif
1514
1515 break;
1516
1517
1518 /* *********************************************************/
1519 /* transduction parse state: extract phonemes of item in internal outBuf */
1520 case SA_STEPSTATE_PROCESS_TRNS_PARSE:
1521
1522 PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)",
1523 sa->headxBottom, sa->headxLen));
1524
1525 /* check for termination condition first */
1526 if (0 == sa->headxLen) {
1527 /* reset headx, cbuf2 */
1528 sa->headxBottom = 0;
1529 sa->cbuf2Len = 0;
1530 /* reset collect state support variables */
1531 sa->inspaceok = TRUE;
1532 sa->needsmoreitems = TRUE;
1533
1534 sa->procState = SA_STEPSTATE_COLLECT;
1535 return PICODATA_PU_BUSY;
1536 }
1537
1538 sa->procState = SA_STEPSTATE_FEED;
1539 /* copy item unmodified */
1540 rv = picodata_put_itemparts(
1541 &(sa->headx[sa->headxBottom].head),
1542 &(sa->cbuf2[sa->headx[sa->headxBottom].cind]),
1543 sa->headx[sa->headxBottom].head.len, sa->tmpbuf,
1544 PICOSA_MAXITEMSIZE, &blen);
1545
1546 if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) {
1547 PICODBG_DEBUG(("PARSE found WORDPHON"));
1548 rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head),
1549 &(sa->cbuf2[sa->headx[sa->headxBottom].cind]));
1550 if (PICO_OK == rv) {
1551 PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction"));
1552 sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST;
1553 } else {
1554 PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv));
1555 }
1556 } else {
1557 PICODBG_DEBUG(("PARSE found other item, just copying"));
1558 }
1559 if (SA_STEPSTATE_FEED == sa->procState) {
1560 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1561 (picoos_uint8 *)"sa-p: ",
1562 sa->tmpbuf, PICOSA_MAXITEMSIZE);
1563
1564 }
1565
1566 /* consume item */
1567 sa->headxBottom++;
1568 sa->headxLen--;
1569
1570 break;
1571
1572 /* *********************************************************/
1573 /* transduce state: copy item in internal outBuf to tmpBuf and transduce */
1574 case SA_STEPSTATE_PROCESS_TRNS_FST:
1575
1576
1577
1578
1579
1580 /* if no word-level FSTs: doing trivial syllabification instead */
1581 if (0 == sa->numFsts) {
1582 PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos));
1583 #if defined(PICO_DEBUG)
1584 {
1585 PICODBG_INFO_CTX();
1586 PICODBG_INFO_MSG(("sa trying to trivially syllabify: "));
1587 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1588 PICODBG_INFO_MSG(("\n"));
1589 }
1590 #endif
1591
1592 picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf,
1593 sa->phonWritePos, sa->phonBufOut,
1594 &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1595 PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos));
1596 #if defined(PICO_DEBUG)
1597 {
1598 PICODBG_INFO_CTX();
1599 PICODBG_INFO_MSG(("sa returned from syllabification: "));
1600 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1601 PICODBG_INFO_MSG(("\n"));
1602 }
1603 #endif
1604
1605 /* eliminate deep epsilons */
1606 PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos));
1607 picotrns_eliminate_epsilons(sa->phonBufOut,
1608 sa->phonWritePos, sa->phonBuf,
1609 &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1610 PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos));
1611 sa->phonReadPos = 0;
1612 sa->phonesTransduced = 1;
1613 sa->procState = SA_STEPSTATE_FEED;
1614 break;
1615 }
1616
1617 /* there are word-level FSTs */
1618 /* termination condition first */
1619 if (sa->curFst >= sa->numFsts) {
1620 /* reset for next transduction */
1621 sa->curFst = 0;
1622 sa->phonReadPos = 0;
1623 sa->phonesTransduced = 1;
1624 sa->procState = SA_STEPSTATE_FEED;
1625 break;
1626 }
1627
1628 /* transduce from phonBufIn to PhonBufOut */
1629 {
1630
1631 picoos_uint32 nrSteps;
1632 #if defined(PICO_DEBUG)
1633 {
1634 PICODBG_INFO_CTX();
1635 PICODBG_INFO_MSG(("sa trying to transduce: "));
1636 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
1637 PICODBG_INFO_MSG(("\n"));
1638 }
1639 #endif
1640 picotrns_transduce(sa->fst[sa->curFst], FALSE,
1641 picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut,
1642 &sa->phonWritePos,
1643 PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf,
1644 sa->maxAltDescLen, &nrSteps);
1645 #if defined(PICO_DEBUG)
1646 {
1647 PICODBG_INFO_CTX();
1648 PICODBG_INFO_MSG(("sa returned from transduction: "));
1649 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
1650 PICODBG_INFO_MSG(("\n"));
1651 }
1652 #endif
1653 }
1654
1655
1656
1657 /*
1658 The trasduction output will contain equivalent items i.e. (x,y') for each (x,y) plus inserted deep symbols (-1,d).
1659 In case of deletions, (x,0) might also be omitted...
1660 */
1661 /* eliminate deep epsilons */
1662 picotrns_eliminate_epsilons(sa->phonBufOut,
1663 sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
1664 sa->phonesTransduced = 1;
1665
1666 sa->curFst++;
1667
1668 return PICODATA_PU_ATOMIC;
1669 /* break; */
1670
1671 /* *********************************************************/
1672 /* feed state: copy item in internal outBuf to output charBuf */
1673
1674 case SA_STEPSTATE_FEED:
1675
1676 PICODBG_DEBUG(("FEED"));
1677
1678 if (sa->phonesTransduced) {
1679 /* replace original phones by transduced */
1680 picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE;
1681 picoos_uint8 plane;
1682 picoos_int16 sym, pos;
1683 while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) {
1684 PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos));
1685 sym = picotrns_unplane(sym, &plane);
1686 PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane));
1687 sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym;
1688 }
1689 PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE));
1690 picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE);
1691 if (SA_POSSYM_INVALID == rv) {
1692 PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list"));
1693 return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL);
1694 }
1695 sa->phonesTransduced = 0;
1696
1697 } /* if (sa->phonesTransduced) */
1698
1699
1700 rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf,
1701 PICOSA_MAXITEMSIZE, &clen);
1702
1703 *numBytesOutput += clen;
1704
1705 PICODBG_DEBUG(("put item, status: %d", rvP));
1706
1707 if (rvP == PICO_OK) {
1708 } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1709 /* try again next time */
1710 PICODBG_DEBUG(("feeding overflow"));
1711 return PICODATA_PU_OUT_FULL;
1712 } else {
1713 /* error, should never happen */
1714 PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1715 return PICODATA_PU_ERROR;
1716 }
1717
1718 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1719 (picoos_uint8 *)"sana: ",
1720 sa->tmpbuf, PICOSA_MAXITEMSIZE);
1721
1722 sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
1723 /* return PICODATA_PU_BUSY; */
1724 break;
1725
1726 default:
1727 break;
1728 } /* switch */
1729
1730 } /* while */
1731
1732 /* should be never reached */
1733 PICODBG_ERROR(("reached end of function"));
1734 picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1735 return PICODATA_PU_ERROR;
1736 }
1737
1738 #ifdef __cplusplus
1739 }
1740 #endif
1741
1742
1743 /* end */
1744