1 /*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /**
17 * @file picotok.c
18 *
19 * tokenizer
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29
30 /* ************************************************************/
31 /* tokenisation and markup handling */
32 /* ************************************************************/
33
34 /** @addtogroup picotok
35 @b tokenisation_overview
36
37 markup handling overview:
38
39 The following markups are recognized
40 - ignore
41 - speed
42 - pitch
43 - volume
44 - voice
45 - preproccontext
46 - mark
47 - play
48 - usesig
49 - genfile
50 - sentence
51 - s
52 - paragraph
53 - p
54 - break
55 - spell (pauses between letter)
56 - phoneme
57
58 All markups which are recognized but are not yet implemented in pico
59 system have the mark.
60 */
61
62
63 #include "picodefs.h"
64 #include "picoos.h"
65 #include "picobase.h"
66 #include "picodbg.h"
67 #include "picodata.h"
68 #include "picotok.h"
69 #include "picoktab.h"
70
71 #ifdef __cplusplus
72 extern "C" {
73 #endif
74 #if 0
75 }
76 #endif
77
78 /* *****************************************************************************/
79
80 #define IN_BUF_SIZE 255
81 #define OUT_BUF_SIZE IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3
82
83 #define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5)
84 #define MAX_NR_MARKUP_PARAMS 6
85 #define MARKUP_HANDLING_DISABLED 0
86 #define MARKUP_HANDLING_ENABLED 1
87 #define EOL '\n'
88
89
90 typedef picoos_int8 pico_tokenSubType;
91 typedef picoos_uint8 pico_tokenType;
92
93 /** @todo : consider adding these specialized exception codes: */
94
95 #define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER
96 #define PICO_ERR_INVALID_MARKUP_TAG PICO_ERR_OTHER
97 #define PICO_ERR_INTERNAL_LIMIT PICO_ERR_OTHER
98
99 typedef enum {MIDummyStart, MIIgnore,
100 MIPitch, MISpeed, MIVolume,
101 MIVoice, MIPreprocContext, MIMarker,
102 MIPlay, MIUseSig, MIGenFile, MIParagraph,
103 MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd
104 } MarkupId;
105 typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName,
106 MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue,
107 MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd,
108 MSError, MSErrorTooLong, MSErrorSyntax
109 } MarkupState;
110 typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual,
111 MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete
112 } MarkupParseError;
113
114 typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType;
115
116 #define UTF_CHAR_COMPLETE 2
117 #define UTF_CHAR_INCOMPLETE 1
118 #define UTF_CHAR_MALFORMED 0
119
120 #define TOK_MARKUP_KW_IGNORE (picoos_uchar*)"ignore"
121 #define TOK_MARKUP_KW_SPEED (picoos_uchar*)"speed"
122 #define TOK_MARKUP_KW_PITCH (picoos_uchar*)"pitch"
123 #define TOK_MARKUP_KW_VOLUME (picoos_uchar*)"volume"
124 #define TOK_MARKUP_KW_VOICE (picoos_uchar*)"voice"
125 #define TOK_MARKUP_KW_CONTEXT (picoos_uchar*)"preproccontext"
126 #define TOK_MARKUP_KW_MARK (picoos_uchar*)"mark"
127 #define TOK_MARKUP_KW_PLAY (picoos_uchar*)"play"
128 #define TOK_MARKUP_KW_USESIG (picoos_uchar*)"usesig"
129 #define TOK_MARKUP_KW_GENFILE (picoos_uchar*)"genfile"
130 #define TOK_MARKUP_KW_SENTENCE (picoos_uchar*)"sentence"
131 #define TOK_MARKUP_KW_S (picoos_uchar*)"s"
132 #define TOK_MARKUP_KW_PARAGRAPH (picoos_uchar*)"paragraph"
133 #define TOK_MARKUP_KW_P (picoos_uchar*)"p"
134 #define TOK_MARKUP_KW_BREAK (picoos_uchar*)"break"
135 #define TOK_MARKUP_KW_SPELL (picoos_uchar*)"spell"
136 #define TOK_MARKUP_KW_PHONEME (picoos_uchar*)"phoneme"
137 #define TOK_MARKUP_KW_ITEM (picoos_uchar*)"item"
138 #define TOK_MARKUP_KW_SPEAKER (picoos_uchar*)"speaker"
139
140 #define KWLevel (picoos_uchar *)"level"
141 #define KWName (picoos_uchar *)"name"
142 #define KWProsDomain (picoos_uchar *)"prosodydomain"
143 #define KWTime (picoos_uchar *)"time"
144 #define KWMode (picoos_uchar *)"mode"
145 #define KWSB (picoos_uchar *)"sb"
146 #define KWPB (picoos_uchar *)"pb"
147 #define KWFile (picoos_uchar *)"file"
148 #define KWType (picoos_uchar *)"type"
149 #define KWF0Beg (picoos_uchar *)"f0beg"
150 #define KWF0End (picoos_uchar *)"f0end"
151 #define KWXFadeBeg (picoos_uchar *)"xfadebeg"
152 #define KWXFadeEnd (picoos_uchar *)"xfadeend"
153 #define KWAlphabet (picoos_uchar *)"alphabet"
154 #define KWPH (picoos_uchar *)"ph"
155 #define KWOrthMode (picoos_uchar *)"orthmode"
156 #define KWIgnorePunct (picoos_uchar *)"ignorepunct"
157 #define KWInfo1 (picoos_uchar *)"info1"
158 #define KWInfo2 (picoos_uchar *)"info2"
159 #define KWDATA (picoos_uchar *)"data"
160
161 #define PICO_SPEED_MIN 20
162 #define PICO_SPEED_MAX 500
163 #define PICO_SPEED_DEFAULT 100
164 #define PICO_SPEED_FACTOR_MIN 500
165 #define PICO_SPEED_FACTOR_MAX 2000
166
167 #define PICO_PITCH_MIN 50
168 #define PICO_PITCH_MAX 200
169 #define PICO_PITCH_DEFAULT 100
170 #define PICO_PITCH_FACTOR_MIN 500
171 #define PICO_PITCH_FACTOR_MAX 2000
172 #define PICO_PITCH_ADD_MIN -100
173 #define PICO_PITCH_ADD_MAX 100
174 #define PICO_PITCH_ADD_DEFAULT 0
175
176 #define PICO_VOLUME_MIN 0
177 #define PICO_VOLUME_MAX 500
178 #define PICO_VOLUME_DEFAULT 100
179 #define PICO_VOLUME_FACTOR_MIN 500
180 #define PICO_VOLUME_FACTOR_MAX 2000
181
182 #define PICO_SPEAKER_MIN 20
183 #define PICO_SPEAKER_MAX 180
184 #define PICO_SPEAKER_DEFAULT 100
185 #define PICO_SPEAKER_FACTOR_MIN 500
186 #define PICO_SPEAKER_FACTOR_MAX 2000
187
188 #define PICO_CONTEXT_DEFAULT (picoos_uchar*)"DEFAULT"
189
190 #define PARAGRAPH_PAUSE_DUR 500
191 #define SPELL_WITH_PHRASE_BREAK 1
192 #define SPELL_WITH_SENTENCE_BREAK 2
193
194 /* *****************************************************************************/
195
196 #define TOK_PUNC_FLUSH (picoos_char) '\0'
197
198 typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE];
199
200
201 struct MarkupParam {
202 Word paramId;
203 Word paramVal;
204 };
205
206 typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS];
207
208
209 /** subobject : TokenizeUnit
210 * shortcut : tok
211 */
212 typedef struct tok_subobj
213 {
214 picoos_int32 ignLevel;
215
216 picoos_uchar utf[5];
217 picoos_int32 utfpos;
218 picoos_int32 utflen;
219
220 MarkupParams markupParams;
221 picoos_int32 nrMarkupParams;
222 MarkupState markupState;
223 picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE];
224 picoos_int32 markupPos;
225 picoos_int32 markupLevel[MIDummyEnd+1];
226 picoos_uchar markupTagName[IN_BUF_SIZE];
227 MarkupTagType markupTagType;
228 MarkupParseError markupTagErr;
229
230 picoos_int32 strPos;
231 picoos_uchar strDelim;
232 picoos_bool isFileAttr;
233
234 pico_tokenType tokenType;
235 pico_tokenSubType tokenSubType;
236
237 picoos_int32 tokenPos;
238 picoos_uchar tokenStr[IN_BUF_SIZE];
239
240 picoos_int32 nrEOL;
241
242 picoos_bool markupHandlingMode; /* to be moved ??? */
243 picoos_bool aborted; /* to be moved ??? */
244
245 picoos_bool start;
246
247 picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */
248 picoos_uint16 outReadPos; /* next pos to read from outBuf */
249 picoos_uint16 outWritePos; /* next pos to write to outBuf */
250
251 picoos_uchar saveFile[IN_BUF_SIZE];
252 Word phonemes;
253
254 picotrns_SimpleTransducer transducer;
255
256 /* kbs */
257
258 picoktab_Graphs graphTab;
259 picokfst_FST xsampa_parser;
260 picokfst_FST svoxpa_parser;
261 picokfst_FST xsampa2svoxpa_mapper;
262
263
264
265 } tok_subobj_t;
266
267 /* *****************************************************************************/
268
269 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
270 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling);
271 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok);
272 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]);
273 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
274 static MarkupId tok_markupTagId (picoos_uchar tagId[]);
275
276 /* *****************************************************************************/
277
tok_strEqual(picoos_uchar * str1,picoos_uchar * str2)278 static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2)
279 {
280 return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0);
281 }
282
tok_reduceBlanks(picoos_uchar * str)283 static void tok_reduceBlanks(picoos_uchar * str)
284 /* Remove leading and trailing blanks of 'str' and reduce
285 groups of blanks within string to exactly one blank. */
286
287 {
288 int i = 0;
289 int j = 0;
290
291 while (str[j] != 0) {
292 if (str[j] == (picoos_uchar)' ') {
293 /* note one blank except at the beginning of string */
294 if (i > 0) {
295 str[i] = (picoos_uchar)' ';
296 i++;
297 }
298 j++;
299 while (str[j] == (picoos_uchar)' ') {
300 j++;
301 }
302 } else {
303 str[i] = str[j];
304 j++;
305 i++;
306 }
307 }
308
309 /* remove blanks at end of string */
310 if ((i > 0) && (str[i - 1] == ' ')) {
311 i--;
312 }
313 str[i] = 0;
314 }
315
316
tok_startIgnore(tok_subobj_t * tok)317 static void tok_startIgnore (tok_subobj_t * tok)
318 {
319 tok->ignLevel++;
320 }
321
322
tok_endIgnore(tok_subobj_t * tok)323 static void tok_endIgnore (tok_subobj_t * tok)
324 {
325 if (tok->ignLevel > 0) {
326 tok->ignLevel--;
327 }
328 }
329
330
tok_getParamIntVal(MarkupParams params,picoos_uchar paramId[],picoos_int32 * paramVal,picoos_bool * paramFound)331 static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound)
332 {
333 int i=0;
334
335 while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
336 i++;
337 }
338 if ((i < MAX_NR_MARKUP_PARAMS)) {
339 (*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal);
340 (*paramFound) = TRUE;
341 } else {
342 (*paramVal) = -1;
343 (*paramFound) = FALSE;
344 }
345 }
346
347
348
tok_getParamStrVal(MarkupParams params,picoos_uchar paramId[],picoos_uchar paramStrVal[],picoos_bool * paramFound)349 static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound)
350 {
351 int i=0;
352
353 while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
354 i++;
355 }
356 if (i < MAX_NR_MARKUP_PARAMS) {
357 picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal);
358 (*paramFound) = TRUE;
359 } else {
360 paramStrVal[0] = 0;
361 (*paramFound) = FALSE;
362 }
363 }
364
365
tok_getParamPhonesStr(MarkupParams params,picoos_uchar paramId[],picoos_uchar alphabet[],picoos_uchar phones[],picoos_int32 phoneslen,picoos_bool * paramFound)366 static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound)
367 {
368
369 int i;
370 picoos_bool done;
371
372 i = 0;
373 while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId, params[i].paramId)) {
374 i++;
375 }
376 if (i < MAX_NR_MARKUP_PARAMS) {
377 if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) {
378 picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen);
379 done = TRUE;
380 } else {
381 done = FALSE;
382 }
383 (*paramFound) = TRUE;
384 } else {
385 done = FALSE;
386 (*paramFound) = FALSE;
387 }
388 if (!done) {
389 phones[0] = 0;
390 }
391 }
392
393
tok_clearMarkupParams(MarkupParams params)394 static void tok_clearMarkupParams (MarkupParams params)
395 {
396 int i;
397
398 for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) {
399 params[i].paramId[0] = 0;
400 params[i].paramVal[0] = 0;
401 }
402 }
403
404
tok_getDur(picoos_uchar durStr[],picoos_uint32 * dur,picoos_bool * done)405 static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done)
406 {
407
408 int num=0;
409 int i=0;
410 picoos_uchar tmpWord[IN_BUF_SIZE];
411
412 picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord));
413 tok_reduceBlanks(tmpWord);
414 while ((durStr[i] >= '0') && (durStr[i] <= '9')) {
415 num = 10 * num + (int)durStr[i] - (int)'0';
416 tmpWord[i] = ' ';
417 i++;
418 }
419 tok_reduceBlanks(tmpWord);
420 if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) {
421 (*dur) = (1000 * num);
422 (*done) = TRUE;
423 } else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) {
424 (*dur) = num;
425 (*done) = TRUE;
426 } else {
427 (*dur) = 0;
428 (*done) = FALSE;
429 }
430 }
431
432
tok_putToUtf(tok_subobj_t * tok,picoos_uchar ch)433 static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch)
434 {
435 if (tok->utfpos < PICOBASE_UTF8_MAXLEN) {
436 tok->utf[tok->utfpos] = ch;
437 if (tok->utfpos == 0) {
438 tok->utflen = picobase_det_utf8_length(ch);
439 } else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) {
440 tok->utflen = 0;
441 }
442 (tok->utfpos)++;
443 if ((tok->utfpos == tok->utflen)) {
444 if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) {
445 tok->utf[tok->utfpos] = 0;
446 }
447 return UTF_CHAR_COMPLETE;
448 } else if (tok->utfpos < tok->utflen) {
449 return UTF_CHAR_INCOMPLETE;
450 } else {
451 return UTF_CHAR_MALFORMED;
452 }
453 } else {
454 return UTF_CHAR_MALFORMED;
455 }
456 }
457
458
tok_isRelative(picoos_uchar strval[],picoos_uint32 * val)459 static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val)
460 {
461 picoos_int32 len;
462 picoos_bool rel;
463
464 rel = FALSE;
465 len = picoos_strlen((picoos_char*)strval);
466 if (len > 0) {
467 if (strval[len - 1] == '%') {
468 strval[len - 1] = 0;
469 if ((strval[0] == '+') || (strval[0] == '-')) {
470 (*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10);
471 } else {
472 (*val) = picoos_atoi((picoos_char*)strval) * 10;
473 }
474 rel = TRUE;
475 }
476 }
477 return rel;
478 }
479
480
tok_putItem(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uint8 itemType,picoos_uint8 info1,picoos_uint8 info2,picoos_uint16 val,picoos_uchar str[])481 static void tok_putItem (picodata_ProcessingUnit this, tok_subobj_t * tok,
482 picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2,
483 picoos_uint16 val,
484 picoos_uchar str[])
485 {
486 picoos_int32 len, i;
487
488 if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) {
489 tok->outBuf[tok->outWritePos++] = itemType;
490 tok->outBuf[tok->outWritePos++] = info1;
491 tok->outBuf[tok->outWritePos++] = info2;
492 tok->outBuf[tok->outWritePos++] = 0;
493 }
494 else if (tok->ignLevel <= 0) {
495 switch (itemType) {
496 case PICODATA_ITEM_CMD:
497 switch (info1) {
498 case PICODATA_ITEMINFO1_CMD_CONTEXT:
499 case PICODATA_ITEMINFO1_CMD_VOICE:
500 case PICODATA_ITEMINFO1_CMD_MARKER:
501 case PICODATA_ITEMINFO1_CMD_PLAY:
502 case PICODATA_ITEMINFO1_CMD_SAVE:
503 case PICODATA_ITEMINFO1_CMD_UNSAVE:
504 case PICODATA_ITEMINFO1_CMD_PROSDOMAIN:
505 case PICODATA_ITEMINFO1_CMD_PHONEME:
506 len = picoos_strlen((picoos_char*)str);
507 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
508 tok->outBuf[tok->outWritePos++] = itemType;
509 tok->outBuf[tok->outWritePos++] = info1;
510 tok->outBuf[tok->outWritePos++] = info2;
511 tok->outBuf[tok->outWritePos++] = len;
512 for (i=0; i<len; i++) {
513 tok->outBuf[tok->outWritePos++] = str[i];
514 }
515 }
516 else {
517 PICODBG_WARN(("tok_putItem: output buffer too small"));
518 }
519 break;
520 case PICODATA_ITEMINFO1_CMD_IGNSIG:
521 case PICODATA_ITEMINFO1_CMD_IGNORE:
522 if (tok->outWritePos + 4 < OUT_BUF_SIZE) {
523 tok->outBuf[tok->outWritePos++] = itemType;
524 tok->outBuf[tok->outWritePos++] = info1;
525 tok->outBuf[tok->outWritePos++] = info2;
526 tok->outBuf[tok->outWritePos++] = 0;
527 }
528 else {
529 PICODBG_WARN(("tok_putItem: output buffer too small"));
530 }
531 break;
532 case PICODATA_ITEMINFO1_CMD_SPEED:
533 case PICODATA_ITEMINFO1_CMD_PITCH:
534 case PICODATA_ITEMINFO1_CMD_VOLUME:
535 case PICODATA_ITEMINFO1_CMD_SPELL:
536 case PICODATA_ITEMINFO1_CMD_SIL:
537 case PICODATA_ITEMINFO1_CMD_SPEAKER:
538 if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) {
539 tok->outBuf[tok->outWritePos++] = itemType;
540 tok->outBuf[tok->outWritePos++] = info1;
541 tok->outBuf[tok->outWritePos++] = info2;
542 tok->outBuf[tok->outWritePos++] = 2;
543 tok->outBuf[tok->outWritePos++] = val % 256;
544 tok->outBuf[tok->outWritePos++] = val / 256;
545 }
546 else {
547 PICODBG_WARN(("tok_putItem: output buffer too small"));
548 }
549 break;
550 default:
551 PICODBG_WARN(("tok_putItem: unknown command type"));
552 }
553 break;
554 case PICODATA_ITEM_TOKEN:
555 len = picoos_strlen((picoos_char*)str);
556 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
557 tok->outBuf[tok->outWritePos++] = itemType;
558 tok->outBuf[tok->outWritePos++] = info1;
559 tok->outBuf[tok->outWritePos++] = info2;
560 tok->outBuf[tok->outWritePos++] = len;
561 for (i=0; i<len; i++) {
562 tok->outBuf[tok->outWritePos++] = str[i];
563 }
564 }
565 else {
566 PICODBG_WARN(("tok_putItem: output buffer too small"));
567 }
568 break;
569 default:
570 PICODBG_WARN(("tok_putItem: unknown item type"));
571 }
572 }
573 }
574
575
tok_putItem2(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uint8 type,picoos_uint8 info1,picoos_uint8 info2,picoos_uint8 len,picoos_uint8 data[])576 static void tok_putItem2 (picodata_ProcessingUnit this, tok_subobj_t * tok,
577 picoos_uint8 type,
578 picoos_uint8 info1, picoos_uint8 info2,
579 picoos_uint8 len,
580 picoos_uint8 data[])
581 {
582 picoos_int32 i;
583
584 if (is_valid_itemtype(type)) {
585 tok->outBuf[tok->outWritePos++] = type;
586 tok->outBuf[tok->outWritePos++] = info1;
587 tok->outBuf[tok->outWritePos++] = info2;
588 tok->outBuf[tok->outWritePos++] = len;
589 for (i=0; i<len; i++) {
590 tok->outBuf[tok->outWritePos++] = data[i];
591 }
592 }
593 }
594
595
tok_markupTagId(picoos_uchar tagId[])596 static MarkupId tok_markupTagId (picoos_uchar tagId[])
597 {
598 if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) {
599 tagId+=5;
600 }
601 if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) {
602 return MIIgnore;
603 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) {
604 return MISpeed;
605 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) {
606 return MIPitch;
607 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) {
608 return MIVolume;
609 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) {
610 return MISpeaker;
611 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) {
612 return MIVoice;
613 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) {
614 return MIPreprocContext;
615 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) {
616 return MIMarker;
617 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) {
618 return MIPlay;
619 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) {
620 return MIUseSig;
621 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) {
622 return MIGenFile;
623 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) {
624 return MISentence;
625 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) {
626 return MIParagraph;
627 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) {
628 return MIBreak;
629 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) {
630 return MISpell;
631 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) {
632 return MIPhoneme;
633 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) {
634 return MIItem;
635 } else {
636 return MIDummyEnd;
637 }
638 }
639
640
tok_checkLimits(picodata_ProcessingUnit this,picoos_uint32 * value,picoos_uint32 min,picoos_uint32 max,picoos_uchar valueType[])641 extern void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[])
642 {
643 if ((((*value) < min) || ((*value) > max))) {
644 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType);
645 if (((*value) < min)) {
646 (*value) = min;
647 } else if (((*value) > max)) {
648 (*value) = max;
649 }
650 }
651 }
652
653
654
tok_checkRealLimits(picodata_ProcessingUnit this,picoos_single * value,picoos_single min,picoos_single max,picoos_uchar valueType[])655 extern void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[])
656 {
657 if ((((*value) < min) || ((*value) > max))) {
658 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType);
659 if (((*value) < min)) {
660 (*value) = min;
661 } else if (((*value) > max)) {
662 (*value) = max;
663 }
664 }
665 }
666
667
668 #define VAL_STR_LEN 21
669
tok_interpretMarkup(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_bool isStartTag,MarkupId mId)670 static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId)
671 {
672 picoos_bool done;
673 picoos_int32 ival;
674 picoos_uint32 uval;
675 picoos_int32 ival2;
676 picoos_uchar valStr[VAL_STR_LEN];
677 picoos_uchar valStr2[VAL_STR_LEN];
678 picoos_uchar valStr3[VAL_STR_LEN];
679 picoos_int32 i2;
680 picoos_uint32 dur;
681 picoos_bool done1;
682 picoos_bool paramFound;
683 picoos_uint8 type, info1, info2;
684 picoos_uint8 data[256];
685 picoos_int32 pos, n, len;
686 picoos_uchar part[10];
687
688 done = FALSE;
689 switch (mId) {
690 case MIIgnore:
691 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
692 tok_startIgnore(tok);
693 done = TRUE;
694 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
695 tok_endIgnore(tok);
696 done = TRUE;
697 }
698 break;
699 case MISpeed:
700 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
701 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
702 tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor");
703 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
704 } else {
705 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
706 tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed");
707 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
708 }
709 done = TRUE;
710 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
711 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)"");
712 done = TRUE;
713 }
714 break;
715 case MIPitch:
716 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
717 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
718 tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor");
719 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
720 } else {
721 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
722 tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch");
723 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
724 }
725 done = TRUE;
726 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
727 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)"");
728 done = TRUE;
729 }
730 break;
731 case MIVolume:
732 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
733 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
734 tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor");
735 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
736 } else {
737 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
738 tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume");
739 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
740 }
741 done = TRUE;
742 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
743 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)"");
744 done = TRUE;
745 }
746 break;
747 case MISpeaker:
748 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
749 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
750 tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor");
751 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
752 } else {
753 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
754 tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume");
755 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
756 }
757 done = TRUE;
758 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
759 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)"");
760 done = TRUE;
761 }
762 break;
763
764 case MIVoice:
765 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
766 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
767 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
768 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
769 done = TRUE;
770 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
771 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
772 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
773 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
774 done = TRUE;
775 }
776 break;
777 case MIPreprocContext:
778 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
779 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
780 done = TRUE;
781 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
782 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT);
783 done = TRUE;
784 }
785 break;
786 case MIMarker:
787 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
788 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
789 done = TRUE;
790 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
791 done = TRUE;
792 }
793 break;
794 case MISentence:
795 if (isStartTag) {
796 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
797 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
798 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr);
799 done = TRUE;
800 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
801 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
802 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)"");
803 done = TRUE;
804 }
805 break;
806 case MIParagraph:
807 if (isStartTag) {
808 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
809 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
810 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr);
811 done = TRUE;
812 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
813 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
814 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)"");
815 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)"");
816 done = TRUE;
817 }
818 break;
819 case MIBreak:
820 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) {
821 tok_getDur(tok->markupParams[0].paramVal, & dur, & done1);
822 tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time");
823 if (done1) {
824 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)"");
825 done = TRUE;
826 }
827 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
828 done = TRUE;
829 }
830 break;
831 case MISpell:
832 if (isStartTag) {
833 if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) {
834 if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) {
835 uval = SPELL_WITH_PHRASE_BREAK;
836 } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) {
837 uval = SPELL_WITH_SENTENCE_BREAK;
838 } else {
839 tok_getDur(tok->markupParams[0].paramVal, & uval, & done1);
840 tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time");
841 if (done1) {
842 done = TRUE;
843 }
844 }
845 } else {
846 uval = SPELL_WITH_PHRASE_BREAK;
847 }
848 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)"");
849 done = TRUE;
850 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
851 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
852 done = TRUE;
853 }
854 break;
855 case MIGenFile:
856 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
857 if (tok->saveFile[0] != 0) {
858 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
859 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile);
860 tok->saveFile[0] = 0;
861 }
862 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE,
863 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/FALSE), 0, tok->markupParams[0].paramVal);
864 picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal);
865 done = TRUE;
866 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
867 if (tok->saveFile[0] != 0) {
868 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
869 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)"");
870 tok->saveFile[0] = 0;
871 }
872 done = TRUE;
873 }
874 break;
875 case MIPlay:
876 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
877 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
878 tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
879 tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
880 tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound);
881 tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
882 tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
883 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
884 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
885 tok_startIgnore(tok);
886 } else {
887 if (tok->ignLevel > 0) {
888 tok_startIgnore(tok);
889 } else {
890 picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal);
891 }
892 }
893 done = TRUE;
894 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
895 tok_endIgnore(tok);
896 done = TRUE;
897 }
898 break;
899 case MIUseSig:
900 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
901 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
902 tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
903 tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
904 tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound);
905 tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
906 tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
907 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
908 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
909 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)"");
910 } else {
911 if (tok->ignLevel <= 0) {
912 picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal);
913 }
914 }
915 done = TRUE;
916 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
917 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
918 done = TRUE;
919 }
920 break;
921 case MIPhoneme:
922 i2 = 0;
923 if (isStartTag) {
924 if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) {
925 if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode)
926 && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) {
927 i2 = 1;
928 }
929 if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) {
930 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
931 PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
932 done = TRUE;
933 } else {
934 PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
935 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal);
936 done = TRUE;
937 }
938 } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) {
939 if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode)
940 && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) {
941 i2 = 1;
942 }
943 if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) {
944 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
945 PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
946 done = TRUE;
947 }
948 else {
949 PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
950 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal);
951 done = TRUE;
952 }
953 }
954 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
955 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
956 PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)"");
957 done = TRUE;
958 }
959 break;
960 case MIItem:
961 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) &&
962 tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&&
963 tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&&
964 tok_strEqual(tok->markupParams[3].paramId, KWDATA)) {
965 picoos_int32 len2, n2;
966 type = picoos_atoi(tok->markupParams[0].paramVal);
967 info1 = picoos_atoi(tok->markupParams[1].paramVal);
968 info2 = picoos_atoi(tok->markupParams[2].paramVal);
969 n = 0; n2 = 0;
970 len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal);
971 while (n<len2) {
972 while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) {
973 n++;
974 }
975 tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n];
976 n++;
977 n2++;
978 }
979 if (is_valid_itemtype(type)) {
980 done = TRUE;
981 len = 0;
982 pos = 0;
983 picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
984 &pos, ',', part, 10, &done1);
985 while (done && done1) {
986 n = picoos_atoi(part);
987 if ((n>=0) && (n<256) && (len<256)) {
988 data[len++] = n;
989 }
990 else {
991 done = FALSE;
992 }
993 picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
994 &pos, ',', part, 10, &done1);
995 }
996 if (done) {
997 tok_putItem2(this, tok, type, info1, info2, len, data);
998 }
999 }
1000 else {
1001 done = FALSE;
1002 }
1003 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
1004 done = TRUE;
1005 }
1006 break;
1007 default:
1008 break;
1009 }
1010 if (!done) {
1011 tok->markupTagErr = MEInterprete;
1012 }
1013 if (isStartTag) {
1014 tok->markupLevel[mId]++;
1015 } else if ((tok->markupLevel[mId] > 0)) {
1016 tok->markupLevel[mId]--;
1017 }
1018 }
1019
1020
tok_attrChar(picoos_uchar ch,picoos_bool first)1021 static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first)
1022 {
1023 return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) ||
1024 ((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) ||
1025 ( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9'))));
1026 }
1027
1028
1029
tok_idChar(picoos_uchar ch,picoos_bool first)1030 static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first)
1031 {
1032 return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':'));
1033 }
1034
1035
tok_setIsFileAttr(picoos_uchar name[],picoos_bool * isFile)1036 static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile)
1037 {
1038 (*isFile) = tok_strEqual(name, KWFile);
1039 }
1040
1041 /* *****************************************************************************/
1042
tok_putToSimpleToken(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uchar str[],pico_tokenType type,pico_tokenSubType subtype)1043 static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype)
1044 {
1045 int i, len;
1046
1047 if (str[0] != 0) {
1048 len = picoos_strlen((picoos_char*)str);
1049 for (i = 0; i < len; i++) {
1050 if (tok->tokenPos >= IN_BUF_SIZE) {
1051 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment");
1052 tok_treatSimpleToken(this, tok);
1053 }
1054 tok->tokenStr[tok->tokenPos] = str[i];
1055 tok->tokenPos++;
1056 }
1057 }
1058 tok->tokenType = type;
1059 tok->tokenSubType = subtype;
1060 }
1061
1062
tok_putToMarkup(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uchar str[])1063 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[])
1064 {
1065 picoos_int32 i, len;
1066 picoos_uint8 ok;
1067
1068 tok->markupTagErr = MENone;
1069 len = picoos_strlen((picoos_char*)str);
1070 for (i = 0; i< len; i++) {
1071 if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) {
1072 if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1073 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long");
1074 }
1075 tok->markupState = MSErrorTooLong;
1076 } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) {
1077 } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1078 tok->markupState = MSGotEnd;
1079 } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1080 if (tok->markupTagType == MTEnd) {
1081 tok->markupTagErr = MEUnexpectedChar;
1082 tok->markupState = MSError;
1083 } else {
1084 tok->markupTagType = MTEmpty;
1085 tok->markupState = MSGotEndSlash;
1086 }
1087 } else {
1088 switch (tok->markupState) {
1089 case MSNotInMarkup:
1090 if (str[i] == (picoos_uchar)'<') {
1091 tok_clearMarkupParams(tok->markupParams);
1092 tok->nrMarkupParams = 0;
1093 tok->strPos = 0;
1094 tok->markupTagType = MTStart;
1095 tok->markupState = MSGotStart;
1096 } else {
1097 tok->markupTagErr = MEMissingStart;
1098 tok->markupState = MSError;
1099 }
1100 break;
1101 case MSGotStart:
1102 if (str[i] == (picoos_uchar)'/') {
1103 tok->markupTagType = MTEnd;
1104 tok->markupState = MSExpectingmarkupTagName;
1105 } else if (str[i] == (picoos_uchar)' ') {
1106 tok->markupState = MSExpectingmarkupTagName;
1107 } else if (tok_idChar(str[i],TRUE)) {
1108 tok->markupTagType = MTStart;
1109 tok->markupTagName[tok->strPos] = str[i];
1110 tok->strPos++;
1111 tok->markupTagName[tok->strPos] = 0;
1112 tok->markupState = MSInmarkupTagName;
1113 } else {
1114 tok->markupTagErr = MEUnexpectedChar;
1115 tok->markupState = MSError;
1116 }
1117 break;
1118 case MSInmarkupTagName: case MSExpectingmarkupTagName:
1119 if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) {
1120 tok->markupTagName[tok->strPos] = str[i];
1121 tok->strPos++;
1122 tok->markupTagName[(tok->strPos)] = 0;
1123 tok->markupState = MSInmarkupTagName;
1124 } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) {
1125 tok->markupState = MSGotmarkupTagName;
1126 picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok);
1127 tok->strPos = 0;
1128 } else {
1129 tok->markupTagErr = MEIdent;
1130 tok->markupState = MSError;
1131 }
1132 break;
1133 case MSGotmarkupTagName: case MSGotAttrValue:
1134 if (tok_attrChar(str[i], TRUE)) {
1135 if (tok->markupTagType == MTEnd) {
1136 tok->markupTagErr = MEUnexpectedChar;
1137 tok->markupState = MSError;
1138 } else {
1139 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1140 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1141 tok->strPos++;
1142 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1143 } else {
1144 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring");
1145 }
1146 tok->markupState = MSInAttrName;
1147 }
1148 } else {
1149 tok->markupTagErr = MEUnexpectedChar;
1150 tok->markupState = MSError;
1151 }
1152 break;
1153 case MSInAttrName:
1154 if (tok_attrChar(str[i], FALSE)) {
1155 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1156 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1157 tok->strPos++;
1158 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1159 }
1160 tok->markupState = MSInAttrName;
1161 } else if (str[i] == (picoos_uchar)' ') {
1162 picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1163 tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1164 tok->markupState = MSGotAttrName;
1165 } else if (str[i] == (picoos_uchar)'=') {
1166 picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1167 tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1168 tok->markupState = MSGotEqual;
1169 } else {
1170 tok->markupTagErr = MEMissingEqual;
1171 tok->markupState = MSError;
1172 }
1173 break;
1174 case MSGotAttrName:
1175 if (str[i] == (picoos_uchar)'=') {
1176 tok->markupState = MSGotEqual;
1177 } else {
1178 tok->markupTagErr = MEMissingEqual;
1179 tok->markupState = MSError;
1180 }
1181 break;
1182 case MSGotEqual:
1183 if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) {
1184 tok->strDelim = str[i];
1185 tok->strPos = 0;
1186 tok->markupState = MSInAttrValue;
1187 } else {
1188 tok->markupTagErr = MEMissingQuote;
1189 tok->markupState = MSError;
1190 }
1191 break;
1192 case MSInAttrValue:
1193 if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) {
1194 tok->markupState = MSInAttrValueEscaped;
1195 } else if (str[i] == tok->strDelim) {
1196 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1197 tok->nrMarkupParams++;
1198 }
1199 tok->strPos = 0;
1200 tok->markupState = MSGotAttrValue;
1201 } else {
1202 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1203 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1204 tok->strPos++;
1205 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1206 }
1207 tok->markupState = MSInAttrValue;
1208 }
1209 break;
1210 case MSInAttrValueEscaped:
1211 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1212 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1213 tok->strPos++;
1214 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1215 }
1216 tok->markupState = MSInAttrValue;
1217 break;
1218 case MSGotEndSlash:
1219 if (str[i] == (picoos_uchar)'>') {
1220 tok->markupState = MSGotEnd;
1221 } else {
1222 tok->markupTagErr = MEUnexpectedChar;
1223 tok->markupState = MSError;
1224 }
1225 break;
1226 default:
1227 tok->markupTagErr = MEUnexpectedChar;
1228 tok->markupState = MSError;
1229 break;
1230 }
1231 }
1232 tok->markupStr[tok->markupPos] = str[i];
1233 tok->markupPos++;
1234 tok->markupStr[tok->markupPos] = 0;
1235 }
1236 /*
1237 PICODBG_DEBUG(("putToMarkup %s", tok->markupStr));
1238 */
1239 }
1240
1241 /* *****************************************************************************/
1242
tok_treatMarkupAsSimpleToken(picodata_ProcessingUnit this,tok_subobj_t * tok)1243 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1244 {
1245 picoos_int32 i;
1246
1247 tok->utfpos = 0;
1248 tok->utflen = 0;
1249 tok->markupState = MSNotInMarkup;
1250 for (i = 0; i < tok->markupPos; i++) {
1251 tok_treatChar(this, tok, tok->markupStr[i], FALSE);
1252 }
1253 tok->markupPos = 0;
1254 tok->strPos = 0;
1255 }
1256
1257
tok_treatMarkup(picodata_ProcessingUnit this,tok_subobj_t * tok)1258 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok)
1259 {
1260 MarkupId mId;
1261
1262 if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) {
1263 if (tok->markupTagErr == MENone) {
1264 tok->markupState = MSNotInMarkup;
1265 if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) {
1266 tok_treatSimpleToken(this, tok);
1267 }
1268 tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1);
1269 mId = tok_markupTagId(tok->markupTagName);
1270 if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) {
1271 tok_interpretMarkup(this, tok, TRUE, mId);
1272 }
1273 if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) {
1274 tok_clearMarkupParams(tok->markupParams);
1275 tok->nrMarkupParams = 0;
1276 tok_interpretMarkup(this, tok, FALSE,mId);
1277 }
1278 }
1279 if (tok->markupTagErr != MENone) {
1280 if (!tok->aborted) {
1281 picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr);
1282 }
1283 tok_treatMarkupAsSimpleToken(this, tok);
1284 }
1285 } else {
1286 tok_treatMarkupAsSimpleToken(this, tok);
1287 }
1288 tok->markupState = MSNotInMarkup;
1289 tok->markupPos = 0;
1290 tok->strPos = 0;
1291 }
1292
1293
1294
tok_treatChar(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uchar ch,picoos_bool markupHandling)1295 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling)
1296 {
1297 picoos_int32 id;
1298 picoos_uint8 uval8;
1299 pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1300 pico_tokenSubType subtype = -1;
1301 picoos_bool dummy;
1302
1303 if (ch == NULLC) {
1304 tok_treatSimpleToken(this, tok);
1305 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
1306 }
1307 else {
1308 switch (tok_putToUtf(tok, ch)) {
1309 case UTF_CHAR_MALFORMED:
1310 tok->utfpos = 0;
1311 tok->utflen = 0;
1312 break;
1313 case UTF_CHAR_INCOMPLETE:
1314 break;
1315 case UTF_CHAR_COMPLETE:
1316 markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED));
1317 id = picoktab_graphOffset(tok->graphTab, tok->utf);
1318 if (id > 0) {
1319 if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) {
1320 type = (pico_tokenType)uval8;
1321 if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) {
1322 type = PICODATA_ITEMINFO1_TOKTYPE_LETTER;
1323 }
1324 }
1325 dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype);
1326 } else if (ch <= (picoos_uchar)' ') {
1327 type = PICODATA_ITEMINFO1_TOKTYPE_SPACE;
1328 subtype = -1;
1329 } else {
1330 type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1331 subtype = -1;
1332 }
1333 if ((ch > (picoos_uchar)' ')) {
1334 tok->nrEOL = 0;
1335 } else if ((ch == EOL)) {
1336 tok->nrEOL++;
1337 }
1338 if (markupHandling && (tok->markupState != MSNotInMarkup)) {
1339 tok_putToMarkup(this, tok, tok->utf);
1340 if (tok->markupState >= MSError) {
1341 tok_treatMarkupAsSimpleToken(this, tok);
1342 } else if (tok->markupState == MSGotEnd) {
1343 tok_treatMarkup(this, tok);
1344 }
1345 } else if ((markupHandling && (ch == (picoos_uchar)'<'))) {
1346 tok_putToMarkup(this, tok, tok->utf);
1347 } else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) {
1348 if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) {
1349 tok_treatSimpleToken(this, tok);
1350 } else if ((ch == EOL) && (tok->nrEOL == 2)) {
1351 tok_treatSimpleToken(this, tok);
1352 tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1);
1353 tok_treatSimpleToken(this, tok);
1354 }
1355 tok_putToSimpleToken(this, tok, tok->utf, type, subtype);
1356 } else {
1357 tok_treatSimpleToken(this, tok);
1358 }
1359 tok->utfpos = 0;
1360 tok->utflen = 0;
1361 break;
1362 }
1363 }
1364 }
1365
1366
tok_treatSimpleToken(picodata_ProcessingUnit this,tok_subobj_t * tok)1367 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1368 {
1369 if (tok->tokenPos < IN_BUF_SIZE) {
1370 tok->tokenStr[tok->tokenPos] = 0;
1371 }
1372 if (tok->markupState != MSNotInMarkup) {
1373 if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1374 picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr);
1375 }
1376 tok_treatMarkupAsSimpleToken(this, tok);
1377 tok_treatSimpleToken(this, tok);
1378 } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) {
1379 tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr);
1380 }
1381 tok->tokenPos = 0;
1382 tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1383 tok->tokenSubType = -1;
1384 }
1385
1386 /* *****************************************************************************/
1387
tokReset(register picodata_ProcessingUnit this,picoos_int32 r_mode)1388 static pico_status_t tokReset(register picodata_ProcessingUnit this, picoos_int32 r_mode)
1389 {
1390 tok_subobj_t * tok;
1391 MarkupId mId;
1392
1393 if (NULL == this || NULL == this->subObj) {
1394 return PICO_ERR_OTHER;
1395 }
1396 tok = (tok_subobj_t *) this->subObj;
1397
1398 tok->ignLevel = 0;
1399
1400 tok->utfpos = 0;
1401 tok->utflen = 0;
1402
1403 tok_clearMarkupParams(tok->markupParams);
1404 tok->nrMarkupParams = 0;
1405 tok->markupState = MSNotInMarkup;
1406 tok->markupPos = 0;
1407 for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) {
1408 tok->markupLevel[mId] = 0;
1409 }
1410 tok->markupTagName[0] = 0;
1411 tok->markupTagType = MTNone;
1412 tok->markupTagErr = MENone;
1413
1414 tok->strPos = 0;
1415 tok->strDelim = 0;
1416 tok->isFileAttr = FALSE;
1417
1418 tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1419 tok->tokenSubType = -1;
1420 tok->tokenPos = 0;
1421
1422 tok->nrEOL = 0;
1423
1424
1425 tok->markupHandlingMode = TRUE;
1426 tok->aborted = FALSE;
1427
1428 tok->start = TRUE;
1429
1430 tok->outReadPos = 0;
1431 tok->outWritePos = 0;
1432
1433 tok->saveFile[0] = 0;
1434
1435
1436 tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
1437
1438 tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]);
1439 PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser));
1440
1441 tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]);
1442 PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser));
1443
1444 tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]);
1445 PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper));
1446
1447
1448
1449 return PICO_OK;
1450 }
1451
tokInitialize(register picodata_ProcessingUnit this,picoos_int32 r_mode)1452 static pico_status_t tokInitialize(register picodata_ProcessingUnit this, picoos_int32 r_mode)
1453 {
1454 /*
1455
1456 tok_subobj_t * tok;
1457
1458 if (NULL == this || NULL == this->subObj) {
1459 return PICO_ERR_OTHER;
1460 }
1461 tok = (tok_subobj_t *) this->subObj;
1462 */
1463 return tokReset(this, r_mode);
1464 }
1465
1466
tokTerminate(register picodata_ProcessingUnit this)1467 static pico_status_t tokTerminate(register picodata_ProcessingUnit this)
1468 {
1469 return PICO_OK;
1470 }
1471
1472 static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput);
1473
tokSubObjDeallocate(register picodata_ProcessingUnit this,picoos_MemoryManager mm)1474 static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this,
1475 picoos_MemoryManager mm)
1476 {
1477
1478 if (NULL != this) {
1479 picoos_deallocate(this->common->mm, (void *) &this->subObj);
1480 }
1481 mm = mm; /* avoid warning "var not used in this function"*/
1482 return PICO_OK;
1483 }
1484
picotok_newTokenizeUnit(picoos_MemoryManager mm,picoos_Common common,picodata_CharBuffer cbIn,picodata_CharBuffer cbOut,picorsrc_Voice voice)1485 picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common,
1486 picodata_CharBuffer cbIn, picodata_CharBuffer cbOut,
1487 picorsrc_Voice voice)
1488 {
1489 tok_subobj_t * tok;
1490 picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
1491 if (this == NULL) {
1492 return NULL;
1493 }
1494 this->initialize = tokInitialize;
1495 PICODBG_DEBUG(("set this->step to tokStep"));
1496 this->step = tokStep;
1497 this->terminate = tokTerminate;
1498 this->subDeallocate = tokSubObjDeallocate;
1499 this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t));
1500 if (this->subObj == NULL) {
1501 picoos_deallocate(mm, (void *)&this);
1502 return NULL;
1503 }
1504 tok = (tok_subobj_t *) this->subObj;
1505 tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2));
1506 if (NULL == tok->transducer) {
1507 tokSubObjDeallocate(this,mm);
1508 picoos_deallocate(mm, (void *)&this);
1509 return NULL;
1510 }
1511 tokInitialize(this, PICO_RESET_FULL);
1512 return this;
1513 }
1514
1515 /**
1516 * fill up internal buffer, try to locate token, write token to output
1517 */
tokStep(register picodata_ProcessingUnit this,picoos_int16 mode,picoos_uint16 * numBytesOutput)1518 picodata_step_result_t tokStep(register picodata_ProcessingUnit this,
1519 picoos_int16 mode, picoos_uint16 * numBytesOutput)
1520 {
1521 register tok_subobj_t * tok;
1522
1523 if (NULL == this || NULL == this->subObj) {
1524 return PICODATA_PU_ERROR;
1525 }
1526 tok = (tok_subobj_t *) this->subObj;
1527
1528 mode = mode; /* avoid warning "var not used in this function"*/
1529
1530 *numBytesOutput = 0;
1531 while (1) { /* exit via return */
1532 picoos_int16 ch;
1533
1534 if ((tok->outWritePos - tok->outReadPos) > 0) {
1535 if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) {
1536 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1537 (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos);
1538 tok->outReadPos += *numBytesOutput;
1539 if (tok->outWritePos == tok->outReadPos) {
1540 tok->outWritePos = 0;
1541 tok->outReadPos = 0;
1542 }
1543 }
1544 else {
1545 return PICODATA_PU_OUT_FULL;
1546 }
1547
1548 }
1549 else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) {
1550 PICODBG_DEBUG(("read in %c", (picoos_char) ch));
1551 tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE);
1552 }
1553 else {
1554 return PICODATA_PU_IDLE;
1555 }
1556 }
1557 }
1558
1559 #ifdef __cplusplus
1560 }
1561 #endif
1562
1563 /* end */
1564