1 /*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /**
17 * @file picoacph.c
18 *
19 * accentuation and phrasing
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picobase.h"
32 #include "picodata.h"
33 #include "picoacph.h"
34 #include "picokdt.h"
35 #include "picoklex.h"
36 #include "picoktab.h"
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 #if 0
42 }
43 #endif
44
45 /* PU acphStep states */
46 #define SA_STEPSTATE_COLLECT 0
47 #define SA_STEPSTATE_PROCESS_PHR 12
48 #define SA_STEPSTATE_PROCESS_ACC 13
49 #define SA_STEPSTATE_FEED 2
50
51
52 /* boundary strength state */
53 #define SA_BOUNDSTRENGTH_SSEP 0 /* sentence separator */
54 #define SA_BOUNDSTRENGTH_PPHR 1 /* primary phrase separator */
55
56
57 /* subobject : AccPhrUnit
58 * shortcut : acph
59 * context size : one phrase, max. 30 non-PUNC items, for non-processed items
60 * one item if internal input empty
61 */
62
63 /**
64 * @addtogroup picoacph
65 *
66 * <b> Pico Accentuation and Phrasing </b>\n
67 *
68 internal buffers:
69
70 - headx : array for extended item heads of fixed size (head plus
71 index for content, plus two fields for boundary strength/type)
72 - cbuf : buffer for item contents (referenced by index in
73 headx).
74
75 0. bottom up filling of items in headx and cbuf
76
77 1. phrasing (right-to-left):
78
79 e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH \n
80 e.g. to BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM \n
81 |1 |2 |3 |4 \n
82
83 2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
84 allmost full), with the trailing PUNCT item included (last item).\n
85 If the trailing PUNC is a a primary phrase separator, the
86 item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
87 be output at the start of the next primary phrase.\n
88 Otherwise,
89 the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
90 so that a BOUND of type SBEG is output at the start of the next primary phrase.
91
92 trailing PUNC item bound states \n
93 SSEP PPHR \n
94 PUNC(SENTEND, X) B(B,X)>SSEP B(P1,X)>SSEP (X = T | Q | E) \n
95 PUNC(FLUSH, T) B(B,T)>SSEP* B(P1,T)>SSEP \n
96 PUNC(PHRASEEND, P) B(B,P)>PPHR B(P1,P)>PPHR \n
97 PUNC(PHRASEEND, FORC) B(B,P)>PPHR B(P1,P)>PPHR \n
98
99 If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
100 all but the first will be treated as an (empty) phrase containing just this item.
101 If this (single) item is a flush, creation of SBEG is suppressed.
102
103
104 - dtphr phrasing tree ("subphrasing")
105 determines
106 - BOUND_PHR2
107 - BOUND_PHR3
108 - boundary strenghts are determined for every word (except the
109 first one) from right-to-left. The boundary types mark the phrase
110 type of the phrase following the boundary.
111 - number of items actually changed (new BOUND items added): because
112 of fixed size without content, two fields are contained in headx
113 to indicate if a BOUND needs to be added to the LEFT of the item.
114 -> headx further extended with boundary strength and type info to
115 indicate that to the left of the headx ele a BOUND needs to be
116 inserted when outputting.
117
118 2. accentuation:
119 - number of items unchanged, content unchanged, only head info changes
120 -> changed in place in headx
121 */
122
123
124 typedef struct {
125 picodata_itemhead_t head;
126 picoos_uint16 cind;
127 picoos_uint8 boundstrength; /* bstrength to the left, 0 if not set */
128 picoos_uint8 boundtype; /* btype for following phrase, 0 if not set */
129 } picoacph_headx_t;
130
131
132 typedef struct acph_subobj {
133 picoos_uint8 procState; /* for next processing step decision */
134 picoos_uint8 boundStrengthState; /* boundary strength state */
135
136 picoos_uint8 inspaceok; /* flag: headx/cbuf has space for an item */
137 picoos_uint8 needsmoreitems; /* flag: need more items */
138
139 picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE]; /* tmp. location for an item */
140
141 picoacph_headx_t headx[PICOACPH_MAXNR_HEADX];
142 picoos_uint16 headxBottom; /* bottom */
143 picoos_uint16 headxLen; /* length, 0 if empty */
144
145 picoos_uint8 cbuf[PICOACPH_MAXSIZE_CBUF];
146 picoos_uint16 cbufBufSize; /* actually allocated size */
147 picoos_uint16 cbufLen; /* length, 0 if empty */
148
149 /* tab knowledge base */
150 picoktab_Phones tabphones;
151
152 /* dtphr knowledge base */
153 picokdt_DtPHR dtphr;
154
155 /* dtacc knowledge base */
156 picokdt_DtACC dtacc;
157 } acph_subobj_t;
158
159
acphInitialize(register picodata_ProcessingUnit this,picoos_int32 resetMode)160 static pico_status_t acphInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
161 acph_subobj_t * acph;
162 picoos_uint16 i;
163
164 PICODBG_DEBUG(("calling"));
165
166 if (NULL == this || NULL == this->subObj) {
167 return picoos_emRaiseException(this->common->em,
168 PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
169 }
170 acph = (acph_subobj_t *) this->subObj;
171 acph->procState = SA_STEPSTATE_COLLECT;
172 acph->boundStrengthState = SA_BOUNDSTRENGTH_SSEP;
173
174 acph->inspaceok = TRUE;
175 acph->needsmoreitems = TRUE;
176
177 acph->headxBottom = 0;
178 acph->headxLen = 0;
179 acph->cbufBufSize = PICOACPH_MAXSIZE_CBUF;
180 acph->cbufLen = 0;
181
182 /* init headx, cbuf */
183 for (i = 0; i < PICOACPH_MAXNR_HEADX; i++){
184 acph->headx[i].head.type = 0;
185 acph->headx[i].head.info1 = 0;
186 acph->headx[i].head.info2 = 0;
187 acph->headx[i].head.len = 0;
188 acph->headx[i].cind = 0;
189 acph->headx[i].boundstrength = 0;
190 acph->headx[i].boundtype = 0;
191 }
192 for (i = 0; i < PICOACPH_MAXSIZE_CBUF; i++) {
193 acph->cbuf[i] = 0;
194 }
195
196 if (resetMode == PICO_RESET_SOFT) {
197 /*following initializations needed only at startup or after a full reset*/
198 return PICO_OK;
199 }
200
201 /* kb tabphones */
202 acph->tabphones =
203 picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
204 if (acph->tabphones == NULL) {
205 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
206 NULL, NULL);
207 }
208 PICODBG_DEBUG(("got tabphones"));
209
210 #ifdef PICO_DEBUG_1
211 {
212 picoos_uint16 itmp;
213 for (itmp = 0; itmp < 256; itmp++) {
214 if (picoktab_hasVowelProp(acph->tabphones, itmp)) {
215 PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
216 }
217 if (picoktab_hasDiphthProp(acph->tabphones, itmp)) {
218 PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
219 }
220 if (picoktab_hasGlottProp(acph->tabphones, itmp)) {
221 PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
222 }
223 if (picoktab_hasNonsyllvowelProp(acph->tabphones, itmp)) {
224 PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
225 }
226 if (picoktab_hasSyllconsProp(acph->tabphones, itmp)) {
227 PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
228 }
229
230 if (picoktab_isPrimstress(acph->tabphones, itmp)) {
231 PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
232 }
233 if (picoktab_isSecstress(acph->tabphones, itmp)) {
234 PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
235 }
236 if (picoktab_isSyllbound(acph->tabphones, itmp)) {
237 PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
238 }
239 if (picoktab_isPause(acph->tabphones, itmp)) {
240 PICODBG_DEBUG(("tabphones isPause: %d", itmp));
241 }
242 }
243
244 PICODBG_DEBUG(("tabphones primstressID: %d",
245 picoktab_getPrimstressID(acph->tabphones)));
246 PICODBG_DEBUG(("tabphones secstressID: %d",
247 picoktab_getSecstressID(acph->tabphones)));
248 PICODBG_DEBUG(("tabphones syllboundID: %d",
249 picoktab_getSyllboundID(acph->tabphones)));
250 PICODBG_DEBUG(("tabphones pauseID: %d",
251 picoktab_getPauseID(acph->tabphones)));
252 }
253 #endif
254
255
256 /* kb dtphr */
257 acph->dtphr = picokdt_getDtPHR(this->voice->kbArray[PICOKNOW_KBID_DT_PHR]);
258 if (acph->dtphr == NULL) {
259 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
260 NULL, NULL);
261 }
262 PICODBG_DEBUG(("got dtphr"));
263
264 /* kb dtacc */
265 acph->dtacc = picokdt_getDtACC(this->voice->kbArray[PICOKNOW_KBID_DT_ACC]);
266 if (acph->dtacc == NULL) {
267 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
268 NULL, NULL);
269 }
270 PICODBG_DEBUG(("got dtacc"));
271
272 return PICO_OK;
273 }
274
275 static picodata_step_result_t acphStep(register picodata_ProcessingUnit this,
276 picoos_int16 mode,
277 picoos_uint16 *numBytesOutput);
278
acphTerminate(register picodata_ProcessingUnit this)279 static pico_status_t acphTerminate(register picodata_ProcessingUnit this)
280 {
281 return PICO_OK;
282 }
283
acphSubObjDeallocate(register picodata_ProcessingUnit this,picoos_MemoryManager mm)284 static pico_status_t acphSubObjDeallocate(register picodata_ProcessingUnit this,
285 picoos_MemoryManager mm) {
286 mm = mm; /* avoid warning "var not used in this function"*/
287 if (NULL != this) {
288 picoos_deallocate(this->common->mm, (void *) &this->subObj);
289 }
290 return PICO_OK;
291 }
292
293
picoacph_newAccPhrUnit(picoos_MemoryManager mm,picoos_Common common,picodata_CharBuffer cbIn,picodata_CharBuffer cbOut,picorsrc_Voice voice)294 picodata_ProcessingUnit picoacph_newAccPhrUnit(picoos_MemoryManager mm,
295 picoos_Common common,
296 picodata_CharBuffer cbIn,
297 picodata_CharBuffer cbOut,
298 picorsrc_Voice voice) {
299 picodata_ProcessingUnit this;
300
301 this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
302 if (this == NULL) {
303 return NULL;
304 }
305
306 this->initialize = acphInitialize;
307 PICODBG_DEBUG(("set this->step to acphStep"));
308 this->step = acphStep;
309 this->terminate = acphTerminate;
310 this->subDeallocate = acphSubObjDeallocate;
311 this->subObj = picoos_allocate(mm, sizeof(acph_subobj_t));
312 if (this->subObj == NULL) {
313 picoos_deallocate(mm, (void *)&this);
314 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
315 return NULL;
316 }
317
318 acphInitialize(this, PICO_RESET_FULL);
319 return this;
320 }
321
322
323 /* ***********************************************************************/
324 /* PROCESS_PHR/ACC support functions */
325 /* ***********************************************************************/
326
327
acphGetNrSylls(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind)328 static picoos_uint8 acphGetNrSylls(register picodata_ProcessingUnit this,
329 register acph_subobj_t *acph,
330 const picoos_uint16 ind) {
331 picoos_uint8 i;
332 picoos_uint8 ch;
333 picoos_uint8 count;
334
335 count = 1;
336 for (i = 0; i < acph->headx[ind].head.len; i++) {
337 ch = acph->cbuf[acph->headx[ind].cind + i];
338 if (picoktab_isSyllbound(acph->tabphones, ch)) {
339 count++;
340 }
341 }
342 return count;
343 }
344
345
346 /* ***********************************************************************/
347 /* PROCESS_PHR functions */
348 /* ***********************************************************************/
349
350
351 /* find next POS to the left of 'ind' and return its POS and index */
acphPhrItemSeqGetPosLeft(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * leftind)352 static picoos_uint8 acphPhrItemSeqGetPosLeft(register picodata_ProcessingUnit this,
353 register acph_subobj_t *acph,
354 const picoos_uint16 ind,
355 picoos_uint16 *leftind) {
356 picoos_uint8 val;
357 picoos_int32 i;
358
359 val = PICOKDT_EPSILON;
360 for (i = ind - 1; ((val == PICOKDT_EPSILON) && (i >= 0)); i--) {
361 if ((acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
362 val = acph->headx[i].head.info1;
363 }
364 }
365 *leftind = i + 1;
366 return val;
367 }
368
369
370 /* right-to-left, for each WORDPHON do phr */
acphSubPhrasing(register picodata_ProcessingUnit this,register acph_subobj_t * acph)371 static pico_status_t acphSubPhrasing(register picodata_ProcessingUnit this,
372 register acph_subobj_t *acph) {
373 picokdt_classify_result_t dtres;
374 picoos_uint8 valbuf[5];
375 picoos_uint16 nrwordspre;
376 picoos_uint16 nrwordsfol;
377 picoos_uint16 nrsyllsfol;
378 picoos_uint16 lastprev2; /* last index of POS(es) found to the left */
379 picoos_uint8 curpos; /* POS(es) of current word */
380 picoos_uint16 upbound; /* index of last WORDPHON item (with POS) */
381 picoos_uint8 okay;
382 picoos_uint8 nosubphrases;
383 picoos_int32 i;
384
385 /* set initial values */
386 okay = TRUE;
387 nosubphrases = TRUE;
388 curpos = PICOKDT_EPSILON; /* needs to be in 2^8 */
389
390 /* set upbound to last WORDPHON, don't worry about first one */
391 upbound = acph->headxLen - 1;
392 while ((upbound > 0) &&
393 (acph->headx[upbound].head.type != PICODATA_ITEM_WORDPHON)) {
394 upbound--;
395 }
396
397 /* zero or one WORDPHON, no subphrasing needed, but handling of
398 BOUND strength state is needed */
399 if (upbound <= 0) {
400 /* phrase not containing more than one WORDPHON */
401 PICODBG_DEBUG(("less than two WORDPHON in phrase -> no subphrasing"));
402 }
403
404 lastprev2 = upbound;
405
406 /* set initial nr pre/fol words/sylls, upbound is ind of last WORDPHON */
407 nrwordsfol = 0;
408 nrsyllsfol = 0;
409 nrwordspre = 0;
410 for (i = 0; i < upbound; i++) {
411 if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
412 nrwordspre++;
413 }
414 }
415
416 nrwordspre++; /* because we later have a decrement before being used */
417
418
419 /* set POS of current word in valbuf[1], will be shifted right afterwards */
420 valbuf[1] = acph->headx[upbound].head.info1;
421 /* find first POS to the left and set valbuf[0] */
422 valbuf[0] = acphPhrItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
423 for (i = 2; i < 5; i++) {
424 valbuf[i] = PICOKDT_EPSILON;
425 }
426
427 PICODBG_TRACE(("headxLen: %d", acph->headxLen));
428
429 /* at least two WORDPHON items */
430 /* process from right-to-left all items in headx, except for 1st WORDPHON */
431 for (i = upbound; (i > 0) && (nrwordspre > 1); i--) {
432 okay = TRUE;
433
434 PICODBG_TRACE(("iter: %d, type: %c", i, acph->headx[i].head.type));
435
436 /* if not (WORDPHON) */
437 if ((acph->headx[i].head.type != PICODATA_ITEM_WORDPHON)) {
438 continue;
439 }
440
441 PICODBG_TRACE(("iter: %d, curpos: %d", i, acph->headx[i].head.info1));
442
443 /* get and set POS of current item, must be WORDPHON */
444 curpos = acph->headx[i].head.info1;
445
446 /* no continue so far => at [i] we have a WORDPHON item */
447 /* shift all POS elements one position to the right */
448 valbuf[4] = valbuf[3];
449 valbuf[3] = valbuf[2];
450 valbuf[2] = valbuf[1];
451 valbuf[1] = valbuf[0];
452 /* find next POS to the left and set valbuf[0] */
453 valbuf[0] = acphPhrItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
454
455 /* better check double than never */
456 if (curpos != valbuf[2]) {
457 PICODBG_WARN(("syncing POS"));
458 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
459 NULL, NULL);
460 valbuf[2] = curpos;
461 }
462
463 nrwordsfol++;
464 nrsyllsfol += acphGetNrSylls(this, acph, i);
465 nrwordspre--;
466
467 PICODBG_TRACE(("%d: [%d,%d|%d|%d,%d|%d,%d,%d]",
468 i, valbuf[0], valbuf[1], valbuf[2], valbuf[3],
469 valbuf[4], nrwordspre, nrwordsfol, nrsyllsfol));
470
471 /* no continue so far => subphrasing needed */
472 /* construct input vector, which is set in dtphr */
473 if (!picokdt_dtPHRconstructInVec(acph->dtphr, valbuf[0], valbuf[1],
474 valbuf[2], valbuf[3], valbuf[4],
475 nrwordspre, nrwordsfol, nrsyllsfol)) {
476 /* error constructing invec */
477 PICODBG_WARN(("problem with invec"));
478 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
479 NULL, NULL);
480 okay = FALSE;
481 }
482 /* classify */
483 if (okay && (!picokdt_dtPHRclassify(acph->dtphr))) {
484 /* error doing classification */
485 PICODBG_WARN(("problem classifying"));
486 picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
487 NULL, NULL);
488 okay = FALSE;
489 }
490 /* decompose */
491 if (okay && (!picokdt_dtPHRdecomposeOutClass(acph->dtphr, &dtres))) {
492 /* error decomposing */
493 PICODBG_WARN(("problem decomposing"));
494 picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
495 NULL, NULL);
496 okay = FALSE;
497 }
498
499 if (okay && dtres.set) {
500 PICODBG_DEBUG(("%d - inpos: %d, out: %d", i,valbuf[2],dtres.class));
501 } else {
502 PICODBG_WARN(("problem determining subphrase boundary strength"));
503 dtres.class = PICODATA_ITEMINFO1_ERR;
504 }
505
506 if (dtres.class > 255) {
507 PICODBG_WARN(("dt class outside valid range, setting to PHR0"));
508 dtres.class = PICODATA_ITEMINFO1_BOUND_PHR0;
509 }
510 acph->headx[i].boundstrength = (picoos_uint8)dtres.class;
511 if ((dtres.class == PICODATA_ITEMINFO1_BOUND_PHR2) ||
512 (dtres.class == PICODATA_ITEMINFO1_BOUND_PHR3)) {
513 if (nosubphrases) {
514 /* it's the last secondary phrase in the primary phrase */
515 /* add type info */
516 switch (acph->headx[acph->headxLen - 1].head.info2) {
517 case PICODATA_ITEMINFO2_PUNC_SENT_T:
518 acph->headx[i].boundtype =
519 PICODATA_ITEMINFO2_BOUNDTYPE_T;
520 break;
521 case PICODATA_ITEMINFO2_PUNC_SENT_Q:
522 acph->headx[i].boundtype =
523 PICODATA_ITEMINFO2_BOUNDTYPE_Q;
524 break;
525 case PICODATA_ITEMINFO2_PUNC_SENT_E:
526 acph->headx[i].boundtype =
527 PICODATA_ITEMINFO2_BOUNDTYPE_E;
528 break;
529 case PICODATA_ITEMINFO2_PUNC_PHRASE:
530 case PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED:
531 acph->headx[i].boundtype =
532 PICODATA_ITEMINFO2_BOUNDTYPE_P;
533 break;
534 default:
535 PICODBG_WARN(("invalid boundary type, not set"));
536 break;
537 }
538 nosubphrases = FALSE;
539
540 } else {
541 acph->headx[i].boundtype =
542 PICODATA_ITEMINFO2_BOUNDTYPE_P;
543 }
544 /* reset nr following words and sylls counters */
545 nrwordsfol = 0;
546 nrsyllsfol = 0;
547 }
548 }
549
550 /* process first item, add bound-info */
551 switch (acph->boundStrengthState) {
552 case SA_BOUNDSTRENGTH_SSEP:
553 acph->headx[0].boundstrength =
554 PICODATA_ITEMINFO1_BOUND_SBEG;
555 break;
556 case SA_BOUNDSTRENGTH_PPHR:
557 acph->headx[0].boundstrength =
558 PICODATA_ITEMINFO1_BOUND_PHR1;
559 break;
560 default:
561 PICODBG_WARN(("invalid boundary strength, not set"));
562 break;
563 }
564
565 /* set boundary strength state */
566 switch (acph->headx[acph->headxLen - 1].head.info1) {
567 case PICODATA_ITEMINFO1_PUNC_SENTEND:
568 case PICODATA_ITEMINFO1_PUNC_FLUSH:
569 acph->boundStrengthState = SA_BOUNDSTRENGTH_SSEP;
570 break;
571 case PICODATA_ITEMINFO1_PUNC_PHRASEEND:
572 acph->boundStrengthState = SA_BOUNDSTRENGTH_PPHR;
573 break;
574 default:
575 PICODBG_WARN(("invalid boundary strength state, not changed"));
576 break;
577 }
578
579 if (nosubphrases) {
580 /* process first item, add type info */
581 switch (acph->headx[acph->headxLen - 1].head.info2) {
582 case PICODATA_ITEMINFO2_PUNC_SENT_T:
583 acph->headx[0].boundtype =
584 PICODATA_ITEMINFO2_BOUNDTYPE_T;
585 break;
586 case PICODATA_ITEMINFO2_PUNC_SENT_Q:
587 acph->headx[0].boundtype =
588 PICODATA_ITEMINFO2_BOUNDTYPE_Q;
589 break;
590 case PICODATA_ITEMINFO2_PUNC_SENT_E:
591 acph->headx[0].boundtype =
592 PICODATA_ITEMINFO2_BOUNDTYPE_E;
593 break;
594 case PICODATA_ITEMINFO2_PUNC_PHRASE:
595 case PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED:
596 acph->headx[0].boundtype =
597 PICODATA_ITEMINFO2_BOUNDTYPE_P;
598 break;
599 default:
600 PICODBG_WARN(("invalid boundary type, not set"));
601 break;
602 }
603 } else {
604 acph->headx[0].boundtype =
605 PICODATA_ITEMINFO2_BOUNDTYPE_P;
606 }
607
608 return PICO_OK;
609 }
610
611
612 /* ***********************************************************************/
613 /* PROCESS_ACC functions */
614 /* ***********************************************************************/
615
616 /* find next POS to the left of 'ind' and return its POS and index */
acphAccItemSeqGetPosLeft(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * leftind)617 static picoos_uint8 acphAccItemSeqGetPosLeft(register picodata_ProcessingUnit this,
618 register acph_subobj_t *acph,
619 const picoos_uint16 ind,
620 picoos_uint16 *leftind) {
621 picoos_uint8 val;
622 picoos_int32 i;
623
624 val = PICOKDT_EPSILON;
625 for (i = ind - 1; ((val == PICOKDT_EPSILON) && (i >= 0)); i--) {
626 if ((acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
627 val = acph->headx[i].head.info1;
628 }
629 }
630 *leftind = i + 1;
631 return val;
632 }
633
634
635 /* s1: nr sylls in word before the first primary stressed syll,
636 s2: nr sylls in word after (but excluding) the first primary stressed syll */
acphAccNrSyllParts(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint8 * s1,picoos_uint8 * s2)637 static picoos_uint8 acphAccNrSyllParts(register picodata_ProcessingUnit this,
638 register acph_subobj_t *acph,
639 const picoos_uint16 ind,
640 picoos_uint8 *s1,
641 picoos_uint8 *s2) {
642 picoos_uint16 pind;
643 picoos_uint16 pend; /* phone string start+len */
644 picoos_uint8 afterprim;
645
646 /* check ind is in valid range */
647 if (ind >= acph->headxLen) {
648 return FALSE;
649 }
650
651 *s1 = 0;
652 *s2 = 0;
653 afterprim = FALSE;
654 pend = acph->headx[ind].cind + acph->headx[ind].head.len;
655 for (pind = acph->headx[ind].cind; pind < pend; pind++) {
656 if (picoktab_isPrimstress(acph->tabphones, acph->cbuf[pind])) {
657 afterprim = TRUE;
658 } else if (picoktab_isSyllbound(acph->tabphones, acph->cbuf[pind])) {
659 if (afterprim) {
660 (*s2)++;
661 } else {
662 (*s1)++;
663 }
664 }
665 }
666 if (afterprim) {
667 (*s2)++;
668 } else {
669 (*s1)++;
670 }
671
672 /* exclude the stressed syllable */
673 if ((*s2) > 0) {
674 (*s2)--;
675 }
676 /* handle the case when there is no primstress */
677 if (!afterprim) {
678 (*s2) = (*s1);
679 }
680 return TRUE;
681 }
682
683
acphAccGetNrsRight(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * nrwordsfol,picoos_uint16 * nrsyllsfol,picoos_uint16 * footwordsfol,picoos_uint16 * footsyllsfol)684 static picoos_uint8 acphAccGetNrsRight(register picodata_ProcessingUnit this,
685 register acph_subobj_t *acph,
686 const picoos_uint16 ind,
687 picoos_uint16 *nrwordsfol,
688 picoos_uint16 *nrsyllsfol,
689 picoos_uint16 *footwordsfol,
690 picoos_uint16 *footsyllsfol) {
691 picoos_uint16 i;
692 picoos_uint8 s1;
693 picoos_uint8 s2;
694
695 if (!acphAccNrSyllParts(this, acph, ind, &s1, &s2)) {
696 return FALSE;
697 }
698
699 *nrwordsfol = 0;
700 *nrsyllsfol = s2;
701 i = ind + 1;
702 while ((i < acph->headxLen) &&
703 (acph->headx[i].boundstrength == PICODATA_ITEMINFO1_BOUND_PHR0)) {
704 if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
705 (*nrwordsfol)++;
706 *nrsyllsfol += acphGetNrSylls(this, acph, i);
707 }
708 i++;
709 }
710
711 *footwordsfol = 0;
712 *footsyllsfol = s2;
713 i = ind + 1;
714 while ((i < acph->headxLen) &&
715 (acph->headx[i].head.info2 != PICODATA_ACC1)) {
716 if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
717 (*footwordsfol)++;
718 *footsyllsfol += acphGetNrSylls(this, acph, i);
719 }
720 i++;
721 }
722 if ((i < acph->headxLen) && (acph->headx[i].head.info2 == PICODATA_ACC1)) {
723 if (!acphAccNrSyllParts(this, acph, i, &s1, &s2)) {
724 return FALSE;
725 }
726 *footsyllsfol += s1;
727 }
728 return TRUE;
729 }
730
731
acphAccGetNrsLeft(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind,picoos_uint16 * nrwordspre,picoos_uint16 * nrsyllspre)732 static picoos_uint8 acphAccGetNrsLeft(register picodata_ProcessingUnit this,
733 register acph_subobj_t *acph,
734 const picoos_uint16 ind,
735 picoos_uint16 *nrwordspre,
736 picoos_uint16 *nrsyllspre) {
737 picoos_int32 i;
738 picoos_uint8 s1;
739 picoos_uint8 s2;
740
741 if (!acphAccNrSyllParts(this, acph, ind, &s1, &s2)) {
742 return FALSE;
743 }
744
745 *nrwordspre = 0;
746 *nrsyllspre = s1;
747 i = ind - 1;
748 while ((i >= 0) &&
749 (acph->headx[i].boundstrength == PICODATA_ITEMINFO1_BOUND_PHR0)) {
750 if (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON) {
751 (*nrwordspre)++;
752 *nrsyllspre += acphGetNrSylls(this, acph, i);
753 }
754 i--;
755 }
756
757 if ((acph->headx[i].boundstrength != PICODATA_ITEMINFO1_BOUND_PHR0) &&
758 (acph->headx[i].head.type == PICODATA_ITEM_WORDPHON)) {
759 (*nrwordspre)++;
760 *nrsyllspre += acphGetNrSylls(this, acph, i);
761 }
762 return TRUE;
763 }
764
765
766 /* return TRUE if wordphon contains no stress, FALSE otherwise */
acphIsWordWithoutStress(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint16 ind)767 static picoos_uint8 acphIsWordWithoutStress(register picodata_ProcessingUnit this,
768 register acph_subobj_t *acph,
769 const picoos_uint16 ind) {
770 picoos_uint8 i;
771 picoos_uint16 pos;
772
773 pos = acph->headx[ind].cind;
774 for (i = 0; i < acph->headx[ind].head.len; i++) {
775 if (picoktab_isPrimstress(acph->tabphones, acph->cbuf[pos + i]) ||
776 picoktab_isSecstress(acph->tabphones, acph->cbuf[pos + i])) {
777 return FALSE;
778 }
779 }
780 return TRUE;
781 }
782
783
784 /* right-to-left, for each WORDPHON do acc */
acphAccentuation(register picodata_ProcessingUnit this,register acph_subobj_t * acph)785 static pico_status_t acphAccentuation(register picodata_ProcessingUnit this,
786 register acph_subobj_t *acph) {
787 picokdt_classify_result_t dtres;
788 picoos_uint8 valbuf[5];
789 picoos_uint16 hist1;
790 picoos_uint16 hist2;
791 picoos_uint16 nrwordspre;
792 picoos_uint16 nrsyllspre;
793 picoos_uint16 nrwordsfol;
794 picoos_uint16 nrsyllsfol;
795 picoos_uint16 footwordsfol;
796 picoos_uint16 footsyllsfol;
797 picoos_uint16 lastprev2; /* last index of POS(es) found to the left */
798 picoos_uint8 curpos; /* POS(es) of current word */
799 picoos_uint16 prevout;
800 picoos_uint8 okay;
801 picoos_int32 upbound; /* index of last WORDPHON item (with POS) */
802 picoos_uint16 i;
803
804 /* set initial values */
805 okay = TRUE;
806 curpos = PICOKDT_EPSILON; /* needs to be < 2^8 */
807
808 /* set upbound to last WORDPHON */
809 upbound = acph->headxLen - 1;
810 while ((upbound >= 0) &&
811 (acph->headx[upbound].head.type != PICODATA_ITEM_WORDPHON)) {
812 upbound--;
813 }
814
815 if (upbound < 0) {
816 /* phrase containing zero WORDPHON */
817 PICODBG_DEBUG(("no WORDPHON in phrase -> no accentuation"));
818 return PICO_OK;
819 }
820
821 lastprev2 = upbound;
822
823 /* set initial history values */
824 prevout = PICOKDT_HISTORY_ZERO;
825 hist1 = PICOKDT_HISTORY_ZERO;
826 hist2 = PICOKDT_HISTORY_ZERO;
827
828 /* set initial nr pre/fol words/sylls, upbound is ind of last WORDPHON */
829 nrwordsfol = 0;
830 nrsyllsfol = 0;
831 footwordsfol = 0;
832 footsyllsfol = 0;
833 nrwordspre = 0;
834 nrsyllspre = 0;
835
836 /* set POS of current word in valbuf[1], will be shifted right afterwards */
837 valbuf[1] = acph->headx[upbound].head.info1;
838 /* find first POS to the left and set valbuf[0] */
839 valbuf[0] = acphAccItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
840 for (i = 2; i < 5; i++) {
841 valbuf[i] = PICOKDT_EPSILON;
842 }
843
844 PICODBG_TRACE(("headxLen: %d", acph->headxLen));
845
846 /* process from right-to-left all items in headx */
847 for (i = upbound+1; i > 0; ) {
848 i--;
849
850 okay = TRUE;
851
852 PICODBG_TRACE(("iter: %d, type: %c", i, acph->headx[i].head.type));
853
854 /* if not (WORDPHON) */
855 if ((acph->headx[i].head.type != PICODATA_ITEM_WORDPHON)) {
856 continue;
857 }
858
859 PICODBG_TRACE(("iter: %d, curpos: %d", i, acph->headx[i].head.info1));
860
861 /* get and set POS of current item, must be WORDPHON */
862 curpos = acph->headx[i].head.info1;
863
864 /* no continue so far => at [i] we have a WORDPHON item */
865 /* shift all POS elements one position to the right */
866 valbuf[4] = valbuf[3];
867 valbuf[3] = valbuf[2];
868 valbuf[2] = valbuf[1];
869 valbuf[1] = valbuf[0];
870 /* find next POS to the left and set valbuf[0] */
871 valbuf[0] = acphAccItemSeqGetPosLeft(this, acph, lastprev2, &lastprev2);
872
873 /* better check double than never */
874 if (curpos != valbuf[2]) {
875 PICODBG_WARN(("syncing POS"));
876 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
877 NULL, NULL);
878 valbuf[2] = curpos;
879 }
880
881 /* set history values */
882 hist2 = hist1;
883 hist1 = prevout;
884
885 /* ************************************************************ */
886 /* many speedups possible by avoiding double calc of attribtues */
887 /* ************************************************************ */
888
889 /* get distances */
890 if ((!acphAccGetNrsRight(this, acph, i, &nrwordsfol, &nrsyllsfol,
891 &footwordsfol, &footsyllsfol)) ||
892 (!acphAccGetNrsLeft(this, acph, i, &nrwordspre, &nrsyllspre))) {
893 PICODBG_WARN(("problem setting distances in invec"));
894 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
895 NULL, NULL);
896 okay = FALSE;
897 }
898
899 PICODBG_TRACE(("%d: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", i,
900 valbuf[0], valbuf[1], valbuf[2], valbuf[3], valbuf[4],
901 hist1, hist2, nrwordspre, nrsyllspre,
902 nrwordsfol, nrsyllsfol, footwordsfol, footsyllsfol));
903
904 /* no continue so far => accentuation needed */
905 /* construct input vector, which is set in dtacc */
906 if (!picokdt_dtACCconstructInVec(acph->dtacc, valbuf[0], valbuf[1],
907 valbuf[2], valbuf[3], valbuf[4],
908 hist1, hist2, nrwordspre, nrsyllspre,
909 nrwordsfol, nrsyllsfol, footwordsfol,
910 footsyllsfol)) {
911 /* error constructing invec */
912 PICODBG_WARN(("problem with invec"));
913 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
914 NULL, NULL);
915 okay = FALSE;
916 }
917 /* classify */
918 if (okay && (!picokdt_dtACCclassify(acph->dtacc, &prevout))) {
919 /* error doing classification */
920 PICODBG_WARN(("problem classifying"));
921 picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
922 NULL, NULL);
923 okay = FALSE;
924 }
925 /* decompose */
926 if (okay && (!picokdt_dtACCdecomposeOutClass(acph->dtacc, &dtres))) {
927 /* error decomposing */
928 PICODBG_WARN(("problem decomposing"));
929 picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
930 NULL, NULL);
931 okay = FALSE;
932 }
933
934 if (dtres.class > 255) {
935 PICODBG_WARN(("dt class outside valid range, setting to ACC0"));
936 dtres.class = PICODATA_ACC0;
937 }
938
939 if (okay && dtres.set) {
940 PICODBG_DEBUG(("%d - inpos: %d, out: %d", i,valbuf[2],dtres.class));
941 if (acphIsWordWithoutStress(this, acph, i)) {
942 if (dtres.class != PICODATA_ACC0) {
943 acph->headx[i].head.info2 = PICODATA_ACC3;
944 } else {
945 acph->headx[i].head.info2 = (picoos_uint8)dtres.class;
946 }
947 } else {
948 acph->headx[i].head.info2 = (picoos_uint8)dtres.class;
949 }
950 PICODBG_DEBUG(("%d - after-nostress-corr: %d",
951 i, acph->headx[i].head.info2));
952 } else {
953 PICODBG_WARN(("problem determining accentuation level"));
954 dtres.class = PICODATA_ITEMINFO1_ERR;
955 }
956 }
957 return PICO_OK;
958 }
959
960
961
962 /* ***********************************************************************/
963 /* acphStep support functions */
964 /* ***********************************************************************/
965
acphPutBoundItem(register picodata_ProcessingUnit this,register acph_subobj_t * acph,const picoos_uint8 strength,const picoos_uint8 type,picoos_uint8 * dopuoutfull,picoos_uint16 * numBytesOutput)966 static picoos_uint8 acphPutBoundItem(register picodata_ProcessingUnit this,
967 register acph_subobj_t *acph,
968 const picoos_uint8 strength,
969 const picoos_uint8 type,
970 picoos_uint8 *dopuoutfull,
971 picoos_uint16 *numBytesOutput) {
972 pico_status_t rv = PICO_OK;
973 picoos_uint16 blen = 0;
974 picodata_itemhead_t tmphead;
975
976 *dopuoutfull = FALSE;
977
978 /* construct BOUND item in tmpbuf and put item */
979 tmphead.type = PICODATA_ITEM_BOUND;
980 tmphead.info1 = strength;
981 tmphead.info2 = type;
982 tmphead.len = 0;
983 rv = picodata_put_itemparts(&tmphead, NULL, 0, acph->tmpbuf,
984 PICODATA_MAX_ITEMSIZE, &blen);
985 if (rv != PICO_OK) {
986 PICODBG_ERROR(("problem creating BOUND item"));
987 picoos_emRaiseException(this->common->em, rv, NULL, NULL);
988 return FALSE;
989 }
990 /* put constructed item to ext. charbuf */
991 rv = picodata_cbPutItem(this->cbOut, acph->tmpbuf, blen, &blen);
992
993 *numBytesOutput += blen;
994 if (rv == PICO_EXC_BUF_OVERFLOW) {
995 PICODBG_DEBUG(("overflow in cb output buffer"));
996 *dopuoutfull = TRUE; /* ie. do PU_OUT_FULL later */
997 return FALSE;
998 } else if (rv != PICO_OK) {
999 PICODBG_ERROR(("problem putting BOUND item"));
1000 picoos_emRaiseException(this->common->em, rv, NULL, NULL);
1001 return FALSE;
1002 }
1003
1004 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1005 (picoos_uint8 *)"acph: ", acph->tmpbuf, blen);
1006
1007 return TRUE;
1008 }
1009
1010
1011
1012 /* ***********************************************************************/
1013 /* acphStep function */
1014 /* ***********************************************************************/
1015
1016 /*
1017 complete phrase processed in one step, if not fast enough -> rework
1018
1019 init, collect into internal buffer, process, and then feed to
1020 output buffer
1021
1022 init state: INIT ext ext
1023 state trans: in hc1 hc2 out
1024
1025 INIT | putItem = 0 0 +1 | BUSY -> COLL (put B-SBEG item,
1026 set do-init to false)
1027
1028 inspace-ok-hc1
1029 needs-more-items-(phrase-or-flush)
1030 COLL1 |getItems -n +n 0 1 | ATOMIC -> PPOSD (got items,
1031 if flush set do-init)
1032 COLL2 |getItems -n +n 1 0 | ATOMIC -> PPOSD (got items, forced)
1033 COLL3 |getItems -n +n 1 1 | IDLE (got items, need more)
1034 COLL4 |getItems = = 1 1 | IDLE (got no items)
1035
1036 PPOSD | posd = ~n~n | BUSY -> PWP (posd done)
1037 PWP | lex/g2p = ~n-n 0+n | BUSY -> PPHR (lex/g2p done)
1038 PPHR | phr = -n 0 +m=n | BUSY -> PACC (phr done, m>=n)
1039 PACC | acc = 0 0 ~m=n | BUSY -> FEED (acc done)
1040
1041 doinit-flag
1042 FEED | putItems 0 0 0 -m-n +m 0 | BUSY -> COLL (put items)
1043 FEED | putItems 0 0 0 -m-n +m 1 | BUSY -> INIT (put items)
1044 FEED | putItems 0 0 0 -d-d +d | OUT_FULL (put some items)
1045 */
1046
acphStep(register picodata_ProcessingUnit this,picoos_int16 mode,picoos_uint16 * numBytesOutput)1047 static picodata_step_result_t acphStep(register picodata_ProcessingUnit this,
1048 picoos_int16 mode,
1049 picoos_uint16 *numBytesOutput) {
1050 register acph_subobj_t *acph;
1051 pico_status_t rv = PICO_OK;
1052 pico_status_t rvP = PICO_OK;
1053 picoos_uint16 blen = 0;
1054 picoos_uint16 clen = 0;
1055 picoos_uint16 i;
1056
1057
1058 if (NULL == this || NULL == this->subObj) {
1059 return PICODATA_PU_ERROR;
1060 }
1061 acph = (acph_subobj_t *) this->subObj;
1062 mode = mode; /* avoid warning "var not used in this function"*/
1063 *numBytesOutput = 0;
1064 while (1) { /* exit via return */
1065 PICODBG_DEBUG(("doing state %i, hLen|c1Len: %d|%d",
1066 acph->procState, acph->headxLen, acph->cbufLen));
1067
1068 switch (acph->procState) {
1069
1070 /* *********************************************************/
1071 /* collect state: get item(s) from charBuf and store in
1072 * internal buffers, need a complete punctuation-phrase
1073 */
1074 case SA_STEPSTATE_COLLECT:
1075
1076 while (acph->inspaceok && acph->needsmoreitems && (PICO_OK ==
1077 (rv = picodata_cbGetItem(this->cbIn, acph->tmpbuf,
1078 PICODATA_MAX_ITEMSIZE, &blen)))) {
1079 rvP = picodata_get_itemparts(acph->tmpbuf,
1080 PICODATA_MAX_ITEMSIZE, &(acph->headx[acph->headxLen].head),
1081 &(acph->cbuf[acph->cbufLen]), acph->cbufBufSize
1082 - acph->cbufLen, &clen);
1083 if (rvP != PICO_OK) {
1084 PICODBG_ERROR(("problem getting item parts"));
1085 picoos_emRaiseException(this->common->em, rvP,
1086 NULL, NULL);
1087 return PICODATA_PU_ERROR;
1088 }
1089
1090 /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
1091 construct PUNC-FLUSH item in headx */
1092 if ((acph->headx[acph->headxLen].head.type
1093 == PICODATA_ITEM_CMD)
1094 && (acph->headx[acph->headxLen].head.info1
1095 == PICODATA_ITEMINFO1_CMD_FLUSH)) {
1096 acph->headx[acph->headxLen].head.type
1097 = PICODATA_ITEM_PUNC;
1098 acph->headx[acph->headxLen].head.info1
1099 = PICODATA_ITEMINFO1_PUNC_FLUSH;
1100 acph->headx[acph->headxLen].head.info2
1101 = PICODATA_ITEMINFO2_PUNC_SENT_T;
1102 acph->headx[acph->headxLen].head.len = 0;
1103 }
1104
1105 /* check/set needsmoreitems */
1106 if (acph->headx[acph->headxLen].head.type
1107 == PICODATA_ITEM_PUNC) {
1108 acph->needsmoreitems = FALSE;
1109 }
1110
1111 /* check/set inspaceok, keep spare slot for forcing */
1112 if ((acph->headxLen >= (PICOACPH_MAXNR_HEADX - 2))
1113 || ((acph->cbufBufSize - acph->cbufLen)
1114 < PICODATA_MAX_ITEMSIZE)) {
1115 acph->inspaceok = FALSE;
1116 }
1117
1118 if (clen > 0) {
1119 acph->headx[acph->headxLen].cind = acph->cbufLen;
1120 acph->cbufLen += clen;
1121 } else {
1122 acph->headx[acph->headxLen].cind = 0;
1123 }
1124 acph->headxLen++;
1125 }
1126
1127 if (!acph->needsmoreitems) {
1128 /* 1, phrase buffered */
1129 acph->procState = SA_STEPSTATE_PROCESS_PHR;
1130 return PICODATA_PU_ATOMIC;
1131 } else if (!acph->inspaceok) {
1132 /* 2, forced phrase end */
1133 /* at least one slot is still free, use it to
1134 force a trailing PUNC item */
1135 acph->headx[acph->headxLen].head.type = PICODATA_ITEM_PUNC;
1136 acph->headx[acph->headxLen].head.info1 =
1137 PICODATA_ITEMINFO1_PUNC_PHRASEEND;
1138 acph->headx[acph->headxLen].head.info2 =
1139 PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
1140 acph->headx[acph->headxLen].head.len = 0;
1141 acph->needsmoreitems = FALSE; /* not really needed for now */
1142 acph->headxLen++;
1143 PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
1144 picoos_emRaiseWarning(this->common->em,
1145 PICO_WARN_FALLBACK, NULL,
1146 (picoos_char *)"forced phrase end");
1147 acph->procState = SA_STEPSTATE_PROCESS_PHR;
1148 return PICODATA_PU_ATOMIC;
1149 } else if (rv == PICO_EOF) {
1150 /* 3, 4 */
1151 return PICODATA_PU_IDLE;
1152 } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
1153 (rv == PICO_EXC_BUF_OVERFLOW)) {
1154 /* error, no valid item in cb (UNDER) */
1155 /* or tmpbuf not large enough, not possible (OVER) */
1156 /* no exception raised, left for ctrl to handle */
1157 PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
1158 return PICODATA_PU_ERROR;
1159 } else {
1160 /* error, only possible if cbGetItem implementation
1161 changes without this function being adapted*/
1162 PICODBG_ERROR(("untreated return value, rv: %d", rv));
1163 return PICODATA_PU_ERROR;
1164 }
1165 break;
1166
1167
1168
1169
1170 /* *********************************************************/
1171 /* process phr state: process items in headx and modify
1172 * headx in place
1173 */
1174 case SA_STEPSTATE_PROCESS_PHR:
1175 /* ensure there is an item in inBuf */
1176 if (acph->headxLen > 0) {
1177 /* we have a phrase in headx, cbuf1 (can be
1178 single PUNC item), do phrasing and modify headx */
1179
1180 if (PICO_OK != acphSubPhrasing(this, acph)) {
1181 picoos_emRaiseException(this->common->em,
1182 PICO_ERR_OTHER, NULL, NULL);
1183 return PICODATA_PU_ERROR;
1184 }
1185 acph->procState = SA_STEPSTATE_PROCESS_ACC;
1186 } else if (acph->headxLen == 0) { /* no items in inBuf */
1187 PICODBG_WARN(("no items in inBuf"));
1188 acph->procState = SA_STEPSTATE_COLLECT;
1189 return PICODATA_PU_BUSY;
1190 }
1191
1192 #if defined (PICO_DEBUG_NOTNEEDED)
1193 if (1) {
1194 picoos_uint8 i, j, ittype;
1195 for (i = 0; i < acph->headxLen; i++) {
1196 if ((acph->headx[i].boundstrength != 0) &&
1197 (acph->headx[i].boundstrength !=
1198 PICODATA_ITEMINFO1_BOUND_PHR0)) {
1199 PICODBG_INFO(("acph-p: boundstrength '%c', "
1200 "boundtype '%c'",
1201 acph->headx[i].boundstrength,
1202 acph->headx[i].boundtype));
1203 }
1204
1205 ittype = acph->headx[i].head.type;
1206 PICODBG_INFO_CTX();
1207 PICODBG_INFO_MSG(("acph-p: ("));
1208 PICODBG_INFO_MSG(("'%c',", ittype));
1209 if ((32 <= acph->headx[i].head.info1) &&
1210 (acph->headx[i].head.info1 < 127) &&
1211 (ittype != PICODATA_ITEM_WORDPHON)) {
1212 PICODBG_INFO_MSG(("'%c',",acph->headx[i].head.info1));
1213 } else {
1214 PICODBG_INFO_MSG(("%3d,", acph->headx[i].head.info1));
1215 }
1216 if ((32 <= acph->headx[i].head.info2) &&
1217 (acph->headx[i].head.info2 < 127)) {
1218 PICODBG_INFO_MSG(("'%c',",acph->headx[i].head.info2));
1219 } else {
1220 PICODBG_INFO_MSG(("%3d,", acph->headx[i].head.info2));
1221 }
1222 PICODBG_INFO_MSG(("%3d)", acph->headx[i].head.len));
1223
1224 for (j = 0; j < acph->headx[i].head.len; j++) {
1225 if ((ittype == PICODATA_ITEM_CMD)) {
1226 PICODBG_INFO_MSG(("%c",
1227 acph->cbuf[acph->headx[i].cind+j]));
1228 } else {
1229 PICODBG_INFO_MSG(("%4d",
1230 acph->cbuf[acph->headx[i].cind+j]));
1231 }
1232 }
1233 PICODBG_INFO_MSG(("\n"));
1234 }
1235 }
1236 #endif
1237
1238 break;
1239
1240
1241 /* *********************************************************/
1242 /* process acc state: process items in headx and modify
1243 * headx in place
1244 */
1245 case SA_STEPSTATE_PROCESS_ACC:
1246 /* ensure there is an item in inBuf */
1247 if (acph->headxLen > 0) {
1248 /* we have a phrase in headx, cbuf (can be
1249 single PUNC item), do accentuation and modify headx */
1250 if (PICO_OK != acphAccentuation(this, acph)) {
1251 picoos_emRaiseException(this->common->em,
1252 PICO_ERR_OTHER, NULL, NULL);
1253 return PICODATA_PU_ERROR;
1254 }
1255 acph->procState = SA_STEPSTATE_FEED;
1256 } else if (acph->headxLen == 0) { /* no items in inBuf */
1257 PICODBG_WARN(("no items in inBuf"));
1258 acph->procState = SA_STEPSTATE_COLLECT;
1259 return PICODATA_PU_BUSY;
1260 }
1261 break;
1262
1263
1264 /* *********************************************************/
1265 /* feed state: copy item in internal outBuf to output charBuf */
1266 case SA_STEPSTATE_FEED: {
1267 picoos_uint16 indupbound;
1268 picoos_uint8 dopuoutfull;
1269
1270 PICODBG_DEBUG(("put out items (bot, len): (%d, %d)",
1271 acph->headxBottom, acph->headxLen));
1272
1273 indupbound = acph->headxBottom + acph->headxLen;
1274 dopuoutfull = FALSE;
1275
1276 if (acph->headxBottom == 0) {
1277 /* construct first BOUND item in tmpbuf and put item */
1278 /* produce BOUND unless it is followed by a term/flush) */
1279 if (acph->headx[0].head.info1
1280 != PICODATA_ITEMINFO1_PUNC_FLUSH) {
1281 if (!acphPutBoundItem(this, acph,
1282 acph->headx[0].boundstrength,
1283 acph->headx[0].boundtype, &dopuoutfull,
1284 numBytesOutput)) {
1285 if (dopuoutfull) {
1286 PICODBG_DEBUG(("feeding overflow"));
1287 return PICODATA_PU_OUT_FULL;
1288 } else {
1289 /* ERR-msg and exception done in acphPutBoundItem */
1290 return PICODATA_PU_ERROR;
1291 }
1292 }
1293 }
1294 }
1295
1296 /* for all items in headx, cbuf */
1297 for (i = acph->headxBottom; i < indupbound; i++) {
1298
1299 switch (acph->headx[i].head.type) {
1300 case PICODATA_ITEM_PUNC:
1301 /* if sentence end, put SEND bound */
1302 if ((acph->headx[i].head.info1 ==
1303 PICODATA_ITEMINFO1_PUNC_SENTEND) &&
1304 (i == (indupbound - 1))) {
1305 /* construct and put BOUND item */
1306 if (!acphPutBoundItem(this, acph,
1307 PICODATA_ITEMINFO1_BOUND_SEND,
1308 PICODATA_ITEMINFO2_NA,
1309 &dopuoutfull, numBytesOutput)) {
1310 if (dopuoutfull) {
1311 PICODBG_DEBUG(("feeding overflow"));
1312 return PICODATA_PU_OUT_FULL;
1313 } else {
1314 /* ERR-msg and exception done
1315 in acphPutBoundItem */
1316 return PICODATA_PU_ERROR;
1317 }
1318 }
1319 } else if ((acph->headx[i].head.info1 ==
1320 PICODATA_ITEMINFO1_PUNC_FLUSH) &&
1321 (i == (indupbound - 1))) {
1322 /* construct and put BOUND item */
1323 if (!acphPutBoundItem(this, acph,
1324 PICODATA_ITEMINFO1_BOUND_TERM,
1325 PICODATA_ITEMINFO2_NA,
1326 &dopuoutfull, numBytesOutput)) {
1327 if (dopuoutfull) {
1328 PICODBG_DEBUG(("feeding overflow"));
1329 return PICODATA_PU_OUT_FULL;
1330 } else {
1331 /* ERR-msg and exception done
1332 in acphPutBoundItem */
1333 return PICODATA_PU_ERROR;
1334 }
1335 }
1336 }
1337 /* else, good-bye PUNC, not needed anymore */
1338 break;
1339 default:
1340
1341 /* PHR2/3 maybe existing, check and add
1342 BOUND item now, if needed */
1343 if ((acph->headx[i].boundstrength ==
1344 PICODATA_ITEMINFO1_BOUND_PHR2) ||
1345 (acph->headx[i].boundstrength ==
1346 PICODATA_ITEMINFO1_BOUND_PHR3)) {
1347 if (!acphPutBoundItem(this, acph,
1348 acph->headx[i].boundstrength,
1349 acph->headx[i].boundtype,
1350 &dopuoutfull, numBytesOutput)) {
1351 if (dopuoutfull) {
1352 PICODBG_DEBUG(("feeding overflow"));
1353 return PICODATA_PU_OUT_FULL;
1354 } else {
1355 /* ERR-msg and exception done
1356 in acphPutBoundItem */
1357 return PICODATA_PU_ERROR;
1358 }
1359 }
1360 }
1361
1362 /* copy item unmodified */
1363 rv = picodata_put_itemparts(&(acph->headx[i].head),
1364 &(acph->cbuf[acph->headx[i].cind]),
1365 acph->headx[i].head.len,
1366 acph->tmpbuf, PICODATA_MAX_ITEMSIZE,
1367 &blen);
1368
1369 rvP = picodata_cbPutItem(this->cbOut, acph->tmpbuf,
1370 PICODATA_MAX_ITEMSIZE, &clen);
1371
1372 *numBytesOutput += clen;
1373
1374 PICODBG_DEBUG(("put item, status: %d", rvP));
1375
1376 if (rvP == PICO_OK) {
1377 acph->headxBottom++;
1378 acph->headxLen--;
1379 } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
1380 /* try again next time, but PHR2/3
1381 bound already added if existing,
1382 ensure it's not output a 2nd
1383 time */
1384 PICODBG_DEBUG(("feeding overflow"));
1385 acph->headx[i].boundstrength = 0;
1386 return PICODATA_PU_OUT_FULL;
1387 } else {
1388 /* error, should never happen */
1389 PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
1390 return PICODATA_PU_ERROR;
1391 }
1392
1393 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1394 (picoos_uint8 *)"acph: ",
1395 acph->tmpbuf, PICODATA_MAX_ITEMSIZE);
1396
1397 break;
1398 } /*switch*/
1399 } /*for*/
1400
1401 /* reset headx, cbuf */
1402 acph->headxBottom = 0;
1403 acph->headxLen = 0;
1404 acph->cbufLen = 0;
1405 for (i = 0; i < PICOACPH_MAXNR_HEADX; i++) {
1406 acph->headx[i].boundstrength = 0;
1407 }
1408
1409 /* reset collect state support variables */
1410 acph->inspaceok = TRUE;
1411 acph->needsmoreitems = TRUE;
1412
1413 acph->procState = SA_STEPSTATE_COLLECT;
1414 return PICODATA_PU_BUSY;
1415 break;
1416 }
1417
1418 default:
1419 break;
1420 } /* switch */
1421
1422 } /* while */
1423
1424 /* should be never reached */
1425 PICODBG_ERROR(("reached end of function"));
1426 picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
1427 return PICODATA_PU_ERROR;
1428 }
1429
1430 #ifdef __cplusplus
1431 }
1432 #endif
1433
1434
1435 /* end */
1436