• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /**
17  * @file picotok.c
18  *
19  * tokenizer
20  *
21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22  * All rights reserved.
23  *
24  * History:
25  * - 2009-04-20 -- initial version
26  *
27  */
28 
29 
30 /* ************************************************************/
31 /* tokenisation and markup handling */
32 /* ************************************************************/
33 
34 /** @addtogroup picotok
35   @b tokenisation_overview
36 
37   markup handling overview:
38 
39   The following markups are recognized
40      - ignore
41      - speed
42      - pitch
43      - volume
44      - voice
45      - preproccontext
46      - mark
47      - play
48      - usesig
49      - genfile
50      - sentence
51      - s
52      - paragraph
53      - p
54      - break
55      - spell            (pauses between letter)
56      - phoneme
57 
58   All markups which are recognized but are not yet implemented in pico
59   system have the mark.
60 */
61 
62 
63 #include "picodefs.h"
64 #include "picoos.h"
65 #include "picobase.h"
66 #include "picodbg.h"
67 #include "picodata.h"
68 #include "picotok.h"
69 #include "picoktab.h"
70 
71 #ifdef __cplusplus
72 extern "C" {
73 #endif
74 #if 0
75 }
76 #endif
77 
78 /* *****************************************************************************/
79 
80 #define IN_BUF_SIZE   255
81 #define OUT_BUF_SIZE  IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3
82 
83 #define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5)
84 #define MAX_NR_MARKUP_PARAMS 6
85 #define MARKUP_HANDLING_DISABLED  0
86 #define MARKUP_HANDLING_ENABLED 1
87 #define EOL '\n'
88 
89 
90 typedef picoos_int8 pico_tokenSubType;
91 typedef picoos_uint8 pico_tokenType;
92 
93 /** @todo : consider adding these specialized exception codes: */
94 
95 #define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER
96 #define PICO_ERR_INVALID_MARKUP_TAG        PICO_ERR_OTHER
97 #define PICO_ERR_INTERNAL_LIMIT            PICO_ERR_OTHER
98 
99 typedef enum {MIDummyStart, MIIgnore,
100               MIPitch, MISpeed, MIVolume,
101               MIVoice, MIPreprocContext, MIMarker,
102               MIPlay, MIUseSig, MIGenFile, MIParagraph,
103               MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd
104              }  MarkupId;
105 typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName,
106               MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue,
107               MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd,
108               MSError, MSErrorTooLong, MSErrorSyntax
109              }  MarkupState;
110 typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual,
111               MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete
112              }  MarkupParseError;
113 
114 typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType;
115 
116 #define UTF_CHAR_COMPLETE   2
117 #define UTF_CHAR_INCOMPLETE 1
118 #define UTF_CHAR_MALFORMED  0
119 
120 #define TOK_MARKUP_KW_IGNORE     (picoos_uchar*)"ignore"
121 #define TOK_MARKUP_KW_SPEED      (picoos_uchar*)"speed"
122 #define TOK_MARKUP_KW_PITCH      (picoos_uchar*)"pitch"
123 #define TOK_MARKUP_KW_VOLUME     (picoos_uchar*)"volume"
124 #define TOK_MARKUP_KW_VOICE      (picoos_uchar*)"voice"
125 #define TOK_MARKUP_KW_CONTEXT    (picoos_uchar*)"preproccontext"
126 #define TOK_MARKUP_KW_MARK       (picoos_uchar*)"mark"
127 #define TOK_MARKUP_KW_PLAY       (picoos_uchar*)"play"
128 #define TOK_MARKUP_KW_USESIG     (picoos_uchar*)"usesig"
129 #define TOK_MARKUP_KW_GENFILE    (picoos_uchar*)"genfile"
130 #define TOK_MARKUP_KW_SENTENCE   (picoos_uchar*)"sentence"
131 #define TOK_MARKUP_KW_S          (picoos_uchar*)"s"
132 #define TOK_MARKUP_KW_PARAGRAPH  (picoos_uchar*)"paragraph"
133 #define TOK_MARKUP_KW_P          (picoos_uchar*)"p"
134 #define TOK_MARKUP_KW_BREAK      (picoos_uchar*)"break"
135 #define TOK_MARKUP_KW_SPELL      (picoos_uchar*)"spell"
136 #define TOK_MARKUP_KW_PHONEME    (picoos_uchar*)"phoneme"
137 #define TOK_MARKUP_KW_ITEM       (picoos_uchar*)"item"
138 #define TOK_MARKUP_KW_SPEAKER    (picoos_uchar*)"speaker"
139 
140 #define KWLevel (picoos_uchar *)"level"
141 #define KWName (picoos_uchar *)"name"
142 #define KWProsDomain (picoos_uchar *)"prosodydomain"
143 #define KWTime (picoos_uchar *)"time"
144 #define KWMode (picoos_uchar *)"mode"
145 #define KWSB (picoos_uchar *)"sb"
146 #define KWPB (picoos_uchar *)"pb"
147 #define KWFile (picoos_uchar *)"file"
148 #define KWType (picoos_uchar *)"type"
149 #define KWF0Beg (picoos_uchar *)"f0beg"
150 #define KWF0End (picoos_uchar *)"f0end"
151 #define KWXFadeBeg (picoos_uchar *)"xfadebeg"
152 #define KWXFadeEnd (picoos_uchar *)"xfadeend"
153 #define KWAlphabet (picoos_uchar *)"alphabet"
154 #define KWPH (picoos_uchar *)"ph"
155 #define KWOrthMode (picoos_uchar *)"orthmode"
156 #define KWIgnorePunct (picoos_uchar *)"ignorepunct"
157 #define KWInfo1 (picoos_uchar *)"info1"
158 #define KWInfo2 (picoos_uchar *)"info2"
159 #define KWDATA (picoos_uchar *)"data"
160 
161 #define PICO_SPEED_MIN           20
162 #define PICO_SPEED_MAX          500
163 #define PICO_SPEED_DEFAULT      100
164 #define PICO_SPEED_FACTOR_MIN   500
165 #define PICO_SPEED_FACTOR_MAX  2000
166 
167 #define PICO_PITCH_MIN           50
168 #define PICO_PITCH_MAX          200
169 #define PICO_PITCH_DEFAULT      100
170 #define PICO_PITCH_FACTOR_MIN   500
171 #define PICO_PITCH_FACTOR_MAX  2000
172 #define PICO_PITCH_ADD_MIN     -100
173 #define PICO_PITCH_ADD_MAX      100
174 #define PICO_PITCH_ADD_DEFAULT    0
175 
176 #define PICO_VOLUME_MIN           0
177 #define PICO_VOLUME_MAX         500
178 #define PICO_VOLUME_DEFAULT     100
179 #define PICO_VOLUME_FACTOR_MIN  500
180 #define PICO_VOLUME_FACTOR_MAX 2000
181 
182 #define PICO_SPEAKER_MIN          20
183 #define PICO_SPEAKER_MAX         180
184 #define PICO_SPEAKER_DEFAULT     100
185 #define PICO_SPEAKER_FACTOR_MIN  500
186 #define PICO_SPEAKER_FACTOR_MAX 2000
187 
188 #define PICO_CONTEXT_DEFAULT   (picoos_uchar*)"DEFAULT"
189 
190 #define PARAGRAPH_PAUSE_DUR 500
191 #define SPELL_WITH_PHRASE_BREAK  1
192 #define SPELL_WITH_SENTENCE_BREAK  2
193 
194 /* *****************************************************************************/
195 
196 #define TOK_PUNC_FLUSH  (picoos_char) '\0'
197 
198 typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE];
199 
200 
201 struct MarkupParam {
202     Word paramId;
203     Word paramVal;
204 };
205 
206 typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS];
207 
208 
209 /** subobject : TokenizeUnit
210  *  shortcut  : tok
211  */
212 typedef struct tok_subobj
213 {
214     picoos_int32 ignLevel;
215 
216     picoos_uchar utf[5];
217     picoos_int32 utfpos;
218     picoos_int32 utflen;
219 
220     MarkupParams markupParams;
221     picoos_int32 nrMarkupParams;
222     MarkupState markupState;
223     picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE];
224     picoos_int32 markupPos;
225     picoos_int32 markupLevel[MIDummyEnd+1];
226     picoos_uchar markupTagName[IN_BUF_SIZE];
227     MarkupTagType markupTagType;
228     MarkupParseError markupTagErr;
229 
230     picoos_int32 strPos;
231     picoos_uchar strDelim;
232     picoos_bool isFileAttr;
233 
234     pico_tokenType tokenType;
235     pico_tokenSubType tokenSubType;
236 
237     picoos_int32 tokenPos;
238     picoos_uchar tokenStr[IN_BUF_SIZE];
239 
240     picoos_int32 nrEOL;
241 
242     picoos_bool markupHandlingMode;       /* to be moved ??? */
243     picoos_bool aborted;                  /* to be moved ??? */
244 
245     picoos_bool start;
246 
247     picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */
248     picoos_uint16 outReadPos; /* next pos to read from outBuf */
249     picoos_uint16 outWritePos; /* next pos to write to outBuf */
250 
251     picoos_uchar saveFile[IN_BUF_SIZE];
252     Word phonemes;
253 
254     picotrns_SimpleTransducer transducer;
255 
256     /* kbs */
257 
258     picoktab_Graphs graphTab;
259     picokfst_FST xsampa_parser;
260     picokfst_FST svoxpa_parser;
261     picokfst_FST xsampa2svoxpa_mapper;
262 
263 
264 
265 } tok_subobj_t;
266 
267 /* *****************************************************************************/
268 
269 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
270 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling);
271 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok);
272 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]);
273 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
274 static MarkupId tok_markupTagId (picoos_uchar tagId[]);
275 
276 /* *****************************************************************************/
277 
tok_strEqual(picoos_uchar * str1,picoos_uchar * str2)278 static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2)
279 {
280    return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0);
281 }
282 
tok_reduceBlanks(picoos_uchar * str)283 static void tok_reduceBlanks(picoos_uchar * str)
284             /* Remove leading and trailing blanks of 'str' and reduce
285                groups of blanks within string to exactly one blank. */
286 
287 {
288     int i = 0;
289     int j = 0;
290 
291      while (str[j] != 0) {
292         if (str[j] == (picoos_uchar)' ') {
293             /* note one blank except at the beginning of string */
294             if (i > 0) {
295                 str[i] = (picoos_uchar)' ';
296                 i++;
297             }
298             j++;
299             while (str[j] == (picoos_uchar)' ') {
300                 j++;
301             }
302         } else {
303             str[i] = str[j];
304             j++;
305             i++;
306         }
307     }
308 
309     /* remove blanks at end of string */
310     if ((i > 0) && (str[i - 1] == ' ')) {
311         i--;
312     }
313     str[i] = 0;
314 }
315 
316 
tok_startIgnore(tok_subobj_t * tok)317 static void tok_startIgnore (tok_subobj_t * tok)
318 {
319     tok->ignLevel++;
320 }
321 
322 
tok_endIgnore(tok_subobj_t * tok)323 static void tok_endIgnore (tok_subobj_t * tok)
324 {
325     if (tok->ignLevel > 0) {
326         tok->ignLevel--;
327     }
328 }
329 
330 
tok_getParamIntVal(MarkupParams params,picoos_uchar paramId[],picoos_int32 * paramVal,picoos_bool * paramFound)331 static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound)
332 {
333     int i=0;
334 
335     while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
336         i++;
337     }
338     if ((i < MAX_NR_MARKUP_PARAMS)) {
339         (*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal);
340         (*paramFound) = TRUE;
341     } else {
342         (*paramVal) =  -1;
343         (*paramFound) = FALSE;
344     }
345 }
346 
347 
348 
tok_getParamStrVal(MarkupParams params,picoos_uchar paramId[],picoos_uchar paramStrVal[],picoos_bool * paramFound)349 static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound)
350 {
351     int i=0;
352 
353     while ((i < MAX_NR_MARKUP_PARAMS) &&  !tok_strEqual(paramId,params[i].paramId)) {
354         i++;
355     }
356     if (i < MAX_NR_MARKUP_PARAMS) {
357         picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal);
358         (*paramFound) = TRUE;
359     } else {
360         paramStrVal[0] = 0;
361         (*paramFound) = FALSE;
362     }
363 }
364 
365 
tok_getParamPhonesStr(MarkupParams params,picoos_uchar paramId[],picoos_uchar alphabet[],picoos_uchar phones[],picoos_int32 phoneslen,picoos_bool * paramFound)366 static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound)
367 {
368 
369     int i;
370     picoos_bool done;
371 
372     i = 0;
373     while ((i < MAX_NR_MARKUP_PARAMS) &&  !tok_strEqual(paramId, params[i].paramId)) {
374         i++;
375     }
376     if (i < MAX_NR_MARKUP_PARAMS) {
377         if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) {
378             picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen);
379             done = TRUE;
380         } else {
381             done = FALSE;
382         }
383         (*paramFound) = TRUE;
384     } else {
385         done = FALSE;
386         (*paramFound) = FALSE;
387     }
388     if (!done) {
389         phones[0] = 0;
390     }
391 }
392 
393 
tok_clearMarkupParams(MarkupParams params)394 static void tok_clearMarkupParams (MarkupParams params)
395 {
396     int i;
397 
398     for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) {
399         params[i].paramId[0] = 0;
400         params[i].paramVal[0] = 0;
401     }
402 }
403 
404 
tok_getDur(picoos_uchar durStr[],picoos_uint32 * dur,picoos_bool * done)405 static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done)
406 {
407 
408     int num=0;
409     int i=0;
410     picoos_uchar tmpWord[IN_BUF_SIZE];
411 
412     picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord));
413     tok_reduceBlanks(tmpWord);
414     while ((durStr[i] >= '0') && (durStr[i] <= '9')) {
415         num = 10 * num + (int)durStr[i] - (int)'0';
416         tmpWord[i] = ' ';
417         i++;
418     }
419     tok_reduceBlanks(tmpWord);
420     if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) {
421         (*dur) = (1000 * num);
422         (*done) = TRUE;
423     } else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) {
424         (*dur) = num;
425         (*done) = TRUE;
426     } else {
427         (*dur) = 0;
428         (*done) = FALSE;
429     }
430 }
431 
432 
tok_putToUtf(tok_subobj_t * tok,picoos_uchar ch)433 static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch)
434 {
435     if (tok->utfpos < PICOBASE_UTF8_MAXLEN) {
436         tok->utf[tok->utfpos] = ch;
437         if (tok->utfpos == 0) {
438             tok->utflen = picobase_det_utf8_length(ch);
439         } else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) {
440             tok->utflen = 0;
441         }
442         (tok->utfpos)++;
443         if ((tok->utfpos == tok->utflen)) {
444             if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) {
445                 tok->utf[tok->utfpos] = 0;
446             }
447             return UTF_CHAR_COMPLETE;
448         } else if (tok->utfpos < tok->utflen) {
449             return UTF_CHAR_INCOMPLETE;
450         } else {
451             return UTF_CHAR_MALFORMED;
452         }
453     } else {
454         return UTF_CHAR_MALFORMED;
455     }
456 }
457 
458 
tok_isRelative(picoos_uchar strval[],picoos_uint32 * val)459 static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val)
460 {
461     picoos_int32 len;
462     picoos_bool rel;
463 
464     rel = FALSE;
465     len = picoos_strlen((picoos_char*)strval);
466     if (len > 0) {
467         if (strval[len - 1] == '%') {
468             strval[len - 1] = 0;
469             if ((strval[0] == '+') || (strval[0] == '-')) {
470                 (*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10);
471             } else {
472                 (*val) = picoos_atoi((picoos_char*)strval) * 10;
473             }
474             rel = TRUE;
475         }
476     }
477     return rel;
478 }
479 
480 
tok_putItem(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uint8 itemType,picoos_uint8 info1,picoos_uint8 info2,picoos_uint16 val,picoos_uchar str[])481 static void tok_putItem (picodata_ProcessingUnit this,  tok_subobj_t * tok,
482                          picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2,
483                          picoos_uint16 val,
484                          picoos_uchar str[])
485 {
486     picoos_int32 len, i;
487 
488     if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) {
489         tok->outBuf[tok->outWritePos++] = itemType;
490         tok->outBuf[tok->outWritePos++] = info1;
491         tok->outBuf[tok->outWritePos++] = info2;
492         tok->outBuf[tok->outWritePos++] = 0;
493     }
494     else if (tok->ignLevel <= 0) {
495         switch (itemType) {
496         case PICODATA_ITEM_CMD:
497             switch (info1) {
498             case PICODATA_ITEMINFO1_CMD_CONTEXT:
499             case PICODATA_ITEMINFO1_CMD_VOICE:
500             case PICODATA_ITEMINFO1_CMD_MARKER:
501             case PICODATA_ITEMINFO1_CMD_PLAY:
502             case PICODATA_ITEMINFO1_CMD_SAVE:
503             case PICODATA_ITEMINFO1_CMD_UNSAVE:
504             case PICODATA_ITEMINFO1_CMD_PROSDOMAIN:
505             case PICODATA_ITEMINFO1_CMD_PHONEME:
506                 len = picoos_strlen((picoos_char*)str);
507                 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
508                     tok->outBuf[tok->outWritePos++] = itemType;
509                     tok->outBuf[tok->outWritePos++] = info1;
510                     tok->outBuf[tok->outWritePos++] = info2;
511                     tok->outBuf[tok->outWritePos++] = len;
512                     for (i=0; i<len; i++) {
513                         tok->outBuf[tok->outWritePos++] = str[i];
514                     }
515                 }
516                 else {
517                     PICODBG_WARN(("tok_putItem: output buffer too small"));
518                 }
519                 break;
520             case PICODATA_ITEMINFO1_CMD_IGNSIG:
521             case PICODATA_ITEMINFO1_CMD_IGNORE:
522                 if (tok->outWritePos + 4 < OUT_BUF_SIZE) {
523                     tok->outBuf[tok->outWritePos++] = itemType;
524                     tok->outBuf[tok->outWritePos++] = info1;
525                     tok->outBuf[tok->outWritePos++] = info2;
526                     tok->outBuf[tok->outWritePos++] = 0;
527                 }
528                 else {
529                     PICODBG_WARN(("tok_putItem: output buffer too small"));
530                 }
531                 break;
532             case PICODATA_ITEMINFO1_CMD_SPEED:
533             case PICODATA_ITEMINFO1_CMD_PITCH:
534             case PICODATA_ITEMINFO1_CMD_VOLUME:
535             case PICODATA_ITEMINFO1_CMD_SPELL:
536             case PICODATA_ITEMINFO1_CMD_SIL:
537             case PICODATA_ITEMINFO1_CMD_SPEAKER:
538                 if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) {
539                     tok->outBuf[tok->outWritePos++] = itemType;
540                     tok->outBuf[tok->outWritePos++] = info1;
541                     tok->outBuf[tok->outWritePos++] = info2;
542                     tok->outBuf[tok->outWritePos++] = 2;
543                     tok->outBuf[tok->outWritePos++] = val % 256;
544                     tok->outBuf[tok->outWritePos++] = val / 256;
545                 }
546                 else {
547                     PICODBG_WARN(("tok_putItem: output buffer too small"));
548                 }
549                 break;
550             default:
551                 PICODBG_WARN(("tok_putItem: unknown command type"));
552             }
553             break;
554         case PICODATA_ITEM_TOKEN:
555             len = picoos_strlen((picoos_char*)str);
556             if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
557                 tok->outBuf[tok->outWritePos++] = itemType;
558                 tok->outBuf[tok->outWritePos++] = info1;
559                 tok->outBuf[tok->outWritePos++] = info2;
560                 tok->outBuf[tok->outWritePos++] = len;
561                 for (i=0; i<len; i++) {
562                     tok->outBuf[tok->outWritePos++] = str[i];
563                 }
564             }
565             else {
566                 PICODBG_WARN(("tok_putItem: output buffer too small"));
567             }
568             break;
569         default:
570             PICODBG_WARN(("tok_putItem: unknown item type"));
571         }
572     }
573 }
574 
575 
tok_putItem2(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uint8 type,picoos_uint8 info1,picoos_uint8 info2,picoos_uint8 len,picoos_uint8 data[])576 static void tok_putItem2 (picodata_ProcessingUnit this,  tok_subobj_t * tok,
577                           picoos_uint8 type,
578                           picoos_uint8 info1, picoos_uint8 info2,
579                           picoos_uint8 len,
580                           picoos_uint8 data[])
581 {
582     picoos_int32 i;
583 
584     if (is_valid_itemtype(type)) {
585         tok->outBuf[tok->outWritePos++] = type;
586         tok->outBuf[tok->outWritePos++] = info1;
587         tok->outBuf[tok->outWritePos++] = info2;
588         tok->outBuf[tok->outWritePos++] = len;
589         for (i=0; i<len; i++) {
590             tok->outBuf[tok->outWritePos++] = data[i];
591         }
592     }
593 }
594 
595 
tok_markupTagId(picoos_uchar tagId[])596 static MarkupId tok_markupTagId (picoos_uchar tagId[])
597 {
598     if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) {
599         tagId+=5;
600     }
601     if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) {
602         return MIIgnore;
603     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) {
604         return MISpeed;
605     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) {
606         return MIPitch;
607     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) {
608         return MIVolume;
609     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) {
610         return MISpeaker;
611     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) {
612         return MIVoice;
613     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) {
614         return MIPreprocContext;
615     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) {
616         return MIMarker;
617     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) {
618         return MIPlay;
619     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) {
620         return MIUseSig;
621     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) {
622         return MIGenFile;
623     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) {
624         return MISentence;
625     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) {
626         return MIParagraph;
627     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) {
628         return MIBreak;
629     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) {
630         return MISpell;
631     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) {
632         return MIPhoneme;
633     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) {
634         return MIItem;
635     } else {
636         return MIDummyEnd;
637     }
638 }
639 
640 
tok_checkLimits(picodata_ProcessingUnit this,picoos_uint32 * value,picoos_uint32 min,picoos_uint32 max,picoos_uchar valueType[])641 extern void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[])
642 {
643     if ((((*value) < min) || ((*value) > max))) {
644         picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType);
645         if (((*value) < min)) {
646             (*value) = min;
647         } else if (((*value) > max)) {
648             (*value) = max;
649         }
650     }
651 }
652 
653 
654 
tok_checkRealLimits(picodata_ProcessingUnit this,picoos_single * value,picoos_single min,picoos_single max,picoos_uchar valueType[])655 extern void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[])
656 {
657     if ((((*value) < min) || ((*value) > max))) {
658           picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType);
659         if (((*value) < min)) {
660             (*value) = min;
661         } else if (((*value) > max)) {
662             (*value) = max;
663         }
664     }
665 }
666 
667 
668 #define VAL_STR_LEN 21
669 
tok_interpretMarkup(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_bool isStartTag,MarkupId mId)670 static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId)
671 {
672     picoos_bool done;
673     picoos_int32 ival;
674     picoos_uint32 uval;
675     picoos_int32 ival2;
676     picoos_uchar valStr[VAL_STR_LEN];
677     picoos_uchar valStr2[VAL_STR_LEN];
678     picoos_uchar valStr3[VAL_STR_LEN];
679     picoos_int32 i2;
680     picoos_uint32 dur;
681     picoos_bool done1;
682     picoos_bool paramFound;
683     picoos_uint8 type, info1, info2;
684     picoos_uint8 data[256];
685     picoos_int32 pos, n, len;
686     picoos_uchar part[10];
687 
688     done = FALSE;
689     switch (mId) {
690         case MIIgnore:
691             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
692                 tok_startIgnore(tok);
693                 done = TRUE;
694             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
695                 tok_endIgnore(tok);
696                 done = TRUE;
697             }
698             break;
699         case MISpeed:
700             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
701                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
702                     tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor");
703                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
704                 } else {
705                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
706                     tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed");
707                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
708                 }
709                 done = TRUE;
710             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
711                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)"");
712                 done = TRUE;
713             }
714             break;
715         case MIPitch:
716             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
717                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
718                     tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor");
719                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
720                 } else {
721                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
722                     tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch");
723                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
724                 }
725                 done = TRUE;
726             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
727                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)"");
728                 done = TRUE;
729             }
730             break;
731         case MIVolume:
732             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
733                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
734                     tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor");
735                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
736                 } else {
737                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
738                     tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume");
739                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
740                 }
741                 done = TRUE;
742             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
743                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)"");
744                 done = TRUE;
745             }
746             break;
747         case MISpeaker:
748             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
749                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
750                     tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor");
751                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
752                 } else {
753                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
754                     tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume");
755                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
756                 }
757                 done = TRUE;
758             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
759                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)"");
760                 done = TRUE;
761             }
762             break;
763 
764         case MIVoice:
765             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
766                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
767                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
768                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
769                 done = TRUE;
770             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
771                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
772                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
773                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
774                 done = TRUE;
775             }
776             break;
777         case MIPreprocContext:
778             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
779                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
780                 done = TRUE;
781             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
782                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT);
783                 done = TRUE;
784             }
785             break;
786         case MIMarker:
787             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
788                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
789                 done = TRUE;
790             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
791                 done = TRUE;
792             }
793             break;
794         case MISentence:
795             if (isStartTag) {
796                 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
797                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
798                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr);
799                 done = TRUE;
800             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
801                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
802                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)"");
803                 done = TRUE;
804             }
805             break;
806         case MIParagraph:
807             if (isStartTag) {
808                 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
809                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
810                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr);
811                 done = TRUE;
812             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
813                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
814                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)"");
815                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)"");
816                 done = TRUE;
817             }
818             break;
819         case MIBreak:
820             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) {
821                 tok_getDur(tok->markupParams[0].paramVal, & dur, & done1);
822                 tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time");
823                 if (done1) {
824                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)"");
825                     done = TRUE;
826                 }
827             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
828                 done = TRUE;
829             }
830             break;
831         case MISpell:
832             if (isStartTag) {
833                 if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) {
834                     if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) {
835                         uval = SPELL_WITH_PHRASE_BREAK;
836                     } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) {
837                         uval = SPELL_WITH_SENTENCE_BREAK;
838                     } else {
839                         tok_getDur(tok->markupParams[0].paramVal, & uval, & done1);
840                         tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time");
841                         if (done1) {
842                             done = TRUE;
843                         }
844                     }
845                 } else {
846                     uval = SPELL_WITH_PHRASE_BREAK;
847                 }
848                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)"");
849                 done = TRUE;
850             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
851                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
852                 done = TRUE;
853             }
854             break;
855         case MIGenFile:
856             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
857                 if (tok->saveFile[0] != 0) {
858                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
859                                picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile);
860                    tok->saveFile[0] = 0;
861                 }
862                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE,
863                             picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal,  /*input*/FALSE), 0, tok->markupParams[0].paramVal);
864                 picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal);
865                 done = TRUE;
866             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
867                 if (tok->saveFile[0] != 0) {
868                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
869                                 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)"");
870                     tok->saveFile[0] = 0;
871                 }
872                 done = TRUE;
873             }
874             break;
875         case MIPlay:
876             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
877                 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
878                     tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
879                     tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
880                     tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound);
881                     tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
882                     tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
883                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
884                                 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
885                     tok_startIgnore(tok);
886                 } else {
887                     if (tok->ignLevel > 0) {
888                         tok_startIgnore(tok);
889                     } else {
890                        picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal);
891                     }
892                 }
893                 done = TRUE;
894             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
895                 tok_endIgnore(tok);
896                 done = TRUE;
897             }
898             break;
899         case MIUseSig:
900             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
901                 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
902                     tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
903                     tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
904                     tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound);
905                     tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
906                     tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
907                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
908                                 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
909                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)"");
910                 } else {
911                     if (tok->ignLevel <= 0) {
912                         picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal);
913                     }
914                 }
915                 done = TRUE;
916             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
917                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
918                 done = TRUE;
919             }
920             break;
921         case MIPhoneme:
922             i2 = 0;
923             if (isStartTag) {
924                 if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) {
925                     if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode)
926                         && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) {
927                         i2 = 1;
928                     }
929                     if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) {
930                         tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
931                             PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
932                         done = TRUE;
933                     } else {
934                         PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
935                         picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal);
936                         done = TRUE;
937                     }
938                 } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) {
939                     if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode)
940                         && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) {
941                         i2 = 1;
942                     }
943                     if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) {
944                         tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
945                             PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
946                         done = TRUE;
947                     }
948                     else {
949                         PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
950                         picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal);
951                         done = TRUE;
952                     }
953                 }
954             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
955                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
956                     PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)"");
957                 done = TRUE;
958             }
959             break;
960         case MIItem:
961             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) &&
962                               tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&&
963                               tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&&
964                               tok_strEqual(tok->markupParams[3].paramId, KWDATA)) {
965                   picoos_int32 len2, n2;
966                   type = picoos_atoi(tok->markupParams[0].paramVal);
967                   info1 = picoos_atoi(tok->markupParams[1].paramVal);
968                   info2 = picoos_atoi(tok->markupParams[2].paramVal);
969                   n = 0; n2 = 0;
970                   len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal);
971                   while (n<len2) {
972                       while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) {
973                           n++;
974                       }
975                       tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n];
976                       n++;
977                       n2++;
978                   }
979                   if (is_valid_itemtype(type)) {
980                       done = TRUE;
981                       len = 0;
982                       pos = 0;
983                       picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
984                                           &pos, ',', part, 10, &done1);
985                       while (done && done1) {
986                           n = picoos_atoi(part);
987                           if ((n>=0) && (n<256) && (len<256)) {
988                               data[len++] = n;
989                           }
990                           else {
991                               done = FALSE;
992                           }
993                           picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
994                                           &pos, ',', part, 10, &done1);
995                       }
996                       if (done) {
997                           tok_putItem2(this, tok, type, info1, info2, len, data);
998                       }
999                   }
1000                   else {
1001                       done = FALSE;
1002                   }
1003             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
1004                 done = TRUE;
1005             }
1006             break;
1007     default:
1008         break;
1009     }
1010     if (!done) {
1011         tok->markupTagErr = MEInterprete;
1012     }
1013     if (isStartTag) {
1014         tok->markupLevel[mId]++;
1015     } else if ((tok->markupLevel[mId] > 0)) {
1016         tok->markupLevel[mId]--;
1017     }
1018 }
1019 
1020 
tok_attrChar(picoos_uchar ch,picoos_bool first)1021 static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first)
1022 {
1023     return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) ||
1024              ((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) ||
1025              ( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9'))));
1026 }
1027 
1028 
1029 
tok_idChar(picoos_uchar ch,picoos_bool first)1030 static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first)
1031 {
1032     return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':'));
1033 }
1034 
1035 
tok_setIsFileAttr(picoos_uchar name[],picoos_bool * isFile)1036 static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile)
1037 {
1038     (*isFile) = tok_strEqual(name, KWFile);
1039 }
1040 
1041 /* *****************************************************************************/
1042 
tok_putToSimpleToken(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uchar str[],pico_tokenType type,pico_tokenSubType subtype)1043 static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype)
1044 {
1045     int i, len;
1046 
1047     if (str[0] != 0) {
1048         len = picoos_strlen((picoos_char*)str);
1049         for (i = 0; i < len; i++) {
1050             if (tok->tokenPos >= IN_BUF_SIZE) {
1051                 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment");
1052                 tok_treatSimpleToken(this, tok);
1053             }
1054             tok->tokenStr[tok->tokenPos] = str[i];
1055             tok->tokenPos++;
1056         }
1057     }
1058     tok->tokenType = type;
1059     tok->tokenSubType = subtype;
1060 }
1061 
1062 
tok_putToMarkup(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uchar str[])1063 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[])
1064 {
1065     picoos_int32 i, len;
1066     picoos_uint8 ok;
1067 
1068     tok->markupTagErr = MENone;
1069     len = picoos_strlen((picoos_char*)str);
1070     for (i = 0; i< len; i++) {
1071         if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) {
1072             if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1073                 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long");
1074             }
1075             tok->markupState = MSErrorTooLong;
1076         } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) {
1077         } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1078             tok->markupState = MSGotEnd;
1079         } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1080             if (tok->markupTagType == MTEnd) {
1081                 tok->markupTagErr = MEUnexpectedChar;
1082                 tok->markupState = MSError;
1083             } else {
1084                 tok->markupTagType = MTEmpty;
1085                 tok->markupState = MSGotEndSlash;
1086             }
1087         } else {
1088             switch (tok->markupState) {
1089                 case MSNotInMarkup:
1090                     if (str[i] == (picoos_uchar)'<') {
1091                         tok_clearMarkupParams(tok->markupParams);
1092                         tok->nrMarkupParams = 0;
1093                         tok->strPos = 0;
1094                         tok->markupTagType = MTStart;
1095                         tok->markupState = MSGotStart;
1096                     } else {
1097                         tok->markupTagErr = MEMissingStart;
1098                         tok->markupState = MSError;
1099                     }
1100                     break;
1101                 case MSGotStart:
1102                     if (str[i] == (picoos_uchar)'/') {
1103                         tok->markupTagType = MTEnd;
1104                         tok->markupState = MSExpectingmarkupTagName;
1105                     } else if (str[i] == (picoos_uchar)' ') {
1106                         tok->markupState = MSExpectingmarkupTagName;
1107                     } else if (tok_idChar(str[i],TRUE)) {
1108                         tok->markupTagType = MTStart;
1109                         tok->markupTagName[tok->strPos] = str[i];
1110                         tok->strPos++;
1111                         tok->markupTagName[tok->strPos] = 0;
1112                         tok->markupState = MSInmarkupTagName;
1113                     } else {
1114                         tok->markupTagErr = MEUnexpectedChar;
1115                         tok->markupState = MSError;
1116                     }
1117                     break;
1118                 case MSInmarkupTagName:   case MSExpectingmarkupTagName:
1119                     if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) {
1120                         tok->markupTagName[tok->strPos] = str[i];
1121                         tok->strPos++;
1122                         tok->markupTagName[(tok->strPos)] = 0;
1123                         tok->markupState = MSInmarkupTagName;
1124                     } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) {
1125                         tok->markupState = MSGotmarkupTagName;
1126                         picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok);
1127                         tok->strPos = 0;
1128                     } else {
1129                         tok->markupTagErr = MEIdent;
1130                         tok->markupState = MSError;
1131                     }
1132                     break;
1133                 case MSGotmarkupTagName:   case MSGotAttrValue:
1134                     if (tok_attrChar(str[i], TRUE)) {
1135                         if (tok->markupTagType == MTEnd) {
1136                             tok->markupTagErr = MEUnexpectedChar;
1137                             tok->markupState = MSError;
1138                         } else {
1139                             if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1140                                 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1141                                 tok->strPos++;
1142                                 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1143                             } else {
1144                                 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring");
1145                             }
1146                             tok->markupState = MSInAttrName;
1147                         }
1148                     } else {
1149                         tok->markupTagErr = MEUnexpectedChar;
1150                         tok->markupState = MSError;
1151                     }
1152                     break;
1153                 case MSInAttrName:
1154                     if (tok_attrChar(str[i], FALSE)) {
1155                         if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1156                             tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1157                             tok->strPos++;
1158                             tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1159                         }
1160                         tok->markupState = MSInAttrName;
1161                     } else if (str[i] == (picoos_uchar)' ') {
1162                         picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1163                         tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1164                         tok->markupState = MSGotAttrName;
1165                     } else if (str[i] == (picoos_uchar)'=') {
1166                         picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1167                         tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1168                         tok->markupState = MSGotEqual;
1169                     } else {
1170                         tok->markupTagErr = MEMissingEqual;
1171                         tok->markupState = MSError;
1172                     }
1173                     break;
1174                 case MSGotAttrName:
1175                     if (str[i] == (picoos_uchar)'=') {
1176                         tok->markupState = MSGotEqual;
1177                     } else {
1178                         tok->markupTagErr = MEMissingEqual;
1179                         tok->markupState = MSError;
1180                     }
1181                     break;
1182                 case MSGotEqual:
1183                     if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) {
1184                         tok->strDelim = str[i];
1185                         tok->strPos = 0;
1186                         tok->markupState = MSInAttrValue;
1187                     } else {
1188                         tok->markupTagErr = MEMissingQuote;
1189                         tok->markupState = MSError;
1190                     }
1191                     break;
1192                 case MSInAttrValue:
1193                     if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) {
1194                         tok->markupState = MSInAttrValueEscaped;
1195                     } else if (str[i] == tok->strDelim) {
1196                         if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1197                             tok->nrMarkupParams++;
1198                         }
1199                         tok->strPos = 0;
1200                         tok->markupState = MSGotAttrValue;
1201                     } else {
1202                         if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1203                             tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1204                             tok->strPos++;
1205                             tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1206                         }
1207                         tok->markupState = MSInAttrValue;
1208                     }
1209                     break;
1210                 case MSInAttrValueEscaped:
1211                     if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1212                         tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1213                         tok->strPos++;
1214                         tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1215                     }
1216                     tok->markupState = MSInAttrValue;
1217                     break;
1218                 case MSGotEndSlash:
1219                     if (str[i] == (picoos_uchar)'>') {
1220                         tok->markupState = MSGotEnd;
1221                     } else {
1222                         tok->markupTagErr = MEUnexpectedChar;
1223                         tok->markupState = MSError;
1224                     }
1225                     break;
1226             default:
1227                 tok->markupTagErr = MEUnexpectedChar;
1228                 tok->markupState = MSError;
1229                 break;
1230             }
1231         }
1232         tok->markupStr[tok->markupPos] = str[i];
1233         tok->markupPos++;
1234         tok->markupStr[tok->markupPos] = 0;
1235     }
1236     /*
1237     PICODBG_DEBUG(("putToMarkup %s", tok->markupStr));
1238     */
1239 }
1240 
1241 /* *****************************************************************************/
1242 
tok_treatMarkupAsSimpleToken(picodata_ProcessingUnit this,tok_subobj_t * tok)1243 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1244 {
1245     picoos_int32 i;
1246 
1247     tok->utfpos = 0;
1248     tok->utflen = 0;
1249     tok->markupState = MSNotInMarkup;
1250     for (i = 0; i < tok->markupPos; i++) {
1251         tok_treatChar(this, tok, tok->markupStr[i], FALSE);
1252     }
1253     tok->markupPos = 0;
1254     tok->strPos = 0;
1255 }
1256 
1257 
tok_treatMarkup(picodata_ProcessingUnit this,tok_subobj_t * tok)1258 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok)
1259 {
1260     MarkupId mId;
1261 
1262     if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) {
1263         if (tok->markupTagErr == MENone) {
1264             tok->markupState = MSNotInMarkup;
1265             if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) {
1266                 tok_treatSimpleToken(this, tok);
1267             }
1268             tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1);
1269             mId = tok_markupTagId(tok->markupTagName);
1270             if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) {
1271                 tok_interpretMarkup(this, tok, TRUE, mId);
1272             }
1273             if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) {
1274                 tok_clearMarkupParams(tok->markupParams);
1275                 tok->nrMarkupParams = 0;
1276                 tok_interpretMarkup(this, tok, FALSE,mId);
1277             }
1278         }
1279         if (tok->markupTagErr != MENone) {
1280             if (!tok->aborted) {
1281               picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr);
1282             }
1283             tok_treatMarkupAsSimpleToken(this, tok);
1284         }
1285     } else {
1286         tok_treatMarkupAsSimpleToken(this, tok);
1287     }
1288     tok->markupState = MSNotInMarkup;
1289     tok->markupPos = 0;
1290     tok->strPos = 0;
1291 }
1292 
1293 
1294 
tok_treatChar(picodata_ProcessingUnit this,tok_subobj_t * tok,picoos_uchar ch,picoos_bool markupHandling)1295 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling)
1296 {
1297     picoos_int32 id;
1298     picoos_uint8 uval8;
1299     pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1300     pico_tokenSubType subtype = -1;
1301     picoos_bool dummy;
1302 
1303     if (ch == NULLC) {
1304       tok_treatSimpleToken(this, tok);
1305       tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
1306     }
1307     else {
1308       switch (tok_putToUtf(tok, ch)) {
1309         case UTF_CHAR_MALFORMED:
1310             tok->utfpos = 0;
1311             tok->utflen = 0;
1312             break;
1313         case UTF_CHAR_INCOMPLETE:
1314             break;
1315         case UTF_CHAR_COMPLETE:
1316             markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED));
1317             id = picoktab_graphOffset(tok->graphTab, tok->utf);
1318             if (id > 0) {
1319                 if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) {
1320                     type = (pico_tokenType)uval8;
1321                     if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) {
1322                         type = PICODATA_ITEMINFO1_TOKTYPE_LETTER;
1323                     }
1324                 }
1325                 dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype);
1326             } else if (ch <= (picoos_uchar)' ') {
1327                 type = PICODATA_ITEMINFO1_TOKTYPE_SPACE;
1328                 subtype =  -1;
1329             } else {
1330                 type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1331                 subtype =  -1;
1332             }
1333             if ((ch > (picoos_uchar)' ')) {
1334                 tok->nrEOL = 0;
1335             } else if ((ch == EOL)) {
1336                 tok->nrEOL++;
1337             }
1338             if (markupHandling && (tok->markupState != MSNotInMarkup)) {
1339                 tok_putToMarkup(this, tok, tok->utf);
1340                 if (tok->markupState >= MSError) {
1341                     tok_treatMarkupAsSimpleToken(this, tok);
1342                 } else if (tok->markupState == MSGotEnd) {
1343                     tok_treatMarkup(this, tok);
1344                 }
1345             } else if ((markupHandling && (ch == (picoos_uchar)'<'))) {
1346                 tok_putToMarkup(this, tok, tok->utf);
1347             } else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) {
1348                 if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) {
1349                     tok_treatSimpleToken(this, tok);
1350                 } else if ((ch == EOL) && (tok->nrEOL == 2)) {
1351                     tok_treatSimpleToken(this, tok);
1352                     tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1);
1353                     tok_treatSimpleToken(this, tok);
1354                 }
1355                 tok_putToSimpleToken(this, tok, tok->utf, type, subtype);
1356             } else {
1357                 tok_treatSimpleToken(this, tok);
1358             }
1359             tok->utfpos = 0;
1360             tok->utflen = 0;
1361             break;
1362       }
1363     }
1364 }
1365 
1366 
tok_treatSimpleToken(picodata_ProcessingUnit this,tok_subobj_t * tok)1367 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1368 {
1369     if (tok->tokenPos < IN_BUF_SIZE) {
1370         tok->tokenStr[tok->tokenPos] = 0;
1371     }
1372     if (tok->markupState != MSNotInMarkup) {
1373         if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1374             picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr);
1375         }
1376         tok_treatMarkupAsSimpleToken(this, tok);
1377         tok_treatSimpleToken(this, tok);
1378     } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) {
1379         tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr);
1380     }
1381     tok->tokenPos = 0;
1382     tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1383     tok->tokenSubType =  -1;
1384 }
1385 
1386 /* *****************************************************************************/
1387 
tokReset(register picodata_ProcessingUnit this,picoos_int32 r_mode)1388 static pico_status_t tokReset(register picodata_ProcessingUnit this, picoos_int32 r_mode)
1389 {
1390     tok_subobj_t * tok;
1391     MarkupId mId;
1392 
1393     if (NULL == this || NULL == this->subObj) {
1394         return PICO_ERR_OTHER;
1395     }
1396     tok = (tok_subobj_t *) this->subObj;
1397 
1398     tok->ignLevel = 0;
1399 
1400     tok->utfpos = 0;
1401     tok->utflen = 0;
1402 
1403     tok_clearMarkupParams(tok->markupParams);
1404     tok->nrMarkupParams = 0;
1405     tok->markupState = MSNotInMarkup;
1406     tok->markupPos = 0;
1407     for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) {
1408         tok->markupLevel[mId] = 0;
1409     }
1410     tok->markupTagName[0] = 0;
1411     tok->markupTagType = MTNone;
1412     tok->markupTagErr = MENone;
1413 
1414     tok->strPos = 0;
1415     tok->strDelim = 0;
1416     tok->isFileAttr = FALSE;
1417 
1418     tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1419     tok->tokenSubType =  -1;
1420     tok->tokenPos = 0;
1421 
1422     tok->nrEOL = 0;
1423 
1424 
1425     tok->markupHandlingMode = TRUE;
1426     tok->aborted = FALSE;
1427 
1428     tok->start = TRUE;
1429 
1430     tok->outReadPos = 0;
1431     tok->outWritePos = 0;
1432 
1433     tok->saveFile[0] = 0;
1434 
1435 
1436     tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
1437 
1438     tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]);
1439     PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser));
1440 
1441     tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]);
1442     PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser));
1443 
1444     tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]);
1445     PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper));
1446 
1447 
1448 
1449     return PICO_OK;
1450 }
1451 
tokInitialize(register picodata_ProcessingUnit this,picoos_int32 r_mode)1452 static pico_status_t tokInitialize(register picodata_ProcessingUnit this, picoos_int32 r_mode)
1453 {
1454 /*
1455 
1456     tok_subobj_t * tok;
1457 
1458     if (NULL == this || NULL == this->subObj) {
1459         return PICO_ERR_OTHER;
1460     }
1461     tok = (tok_subobj_t *) this->subObj;
1462 */
1463     return tokReset(this, r_mode);
1464 }
1465 
1466 
tokTerminate(register picodata_ProcessingUnit this)1467 static pico_status_t tokTerminate(register picodata_ProcessingUnit this)
1468 {
1469     return PICO_OK;
1470 }
1471 
1472 static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput);
1473 
tokSubObjDeallocate(register picodata_ProcessingUnit this,picoos_MemoryManager mm)1474 static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this,
1475         picoos_MemoryManager mm)
1476 {
1477 
1478     if (NULL != this) {
1479         picoos_deallocate(this->common->mm, (void *) &this->subObj);
1480     }
1481     mm = mm;        /* avoid warning "var not used in this function"*/
1482     return PICO_OK;
1483 }
1484 
picotok_newTokenizeUnit(picoos_MemoryManager mm,picoos_Common common,picodata_CharBuffer cbIn,picodata_CharBuffer cbOut,picorsrc_Voice voice)1485 picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common,
1486         picodata_CharBuffer cbIn, picodata_CharBuffer cbOut,
1487         picorsrc_Voice voice)
1488 {
1489     tok_subobj_t * tok;
1490     picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
1491     if (this == NULL) {
1492         return NULL;
1493     }
1494     this->initialize = tokInitialize;
1495     PICODBG_DEBUG(("set this->step to tokStep"));
1496     this->step = tokStep;
1497     this->terminate = tokTerminate;
1498     this->subDeallocate = tokSubObjDeallocate;
1499     this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t));
1500     if (this->subObj == NULL) {
1501         picoos_deallocate(mm, (void *)&this);
1502         return NULL;
1503     }
1504     tok = (tok_subobj_t *) this->subObj;
1505     tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2));
1506     if (NULL == tok->transducer) {
1507         tokSubObjDeallocate(this,mm);
1508         picoos_deallocate(mm, (void *)&this);
1509         return NULL;
1510     }
1511     tokInitialize(this, PICO_RESET_FULL);
1512     return this;
1513 }
1514 
1515 /**
1516  * fill up internal buffer, try to locate token, write token to output
1517  */
tokStep(register picodata_ProcessingUnit this,picoos_int16 mode,picoos_uint16 * numBytesOutput)1518 picodata_step_result_t tokStep(register picodata_ProcessingUnit this,
1519         picoos_int16 mode, picoos_uint16 * numBytesOutput)
1520 {
1521     register tok_subobj_t * tok;
1522 
1523     if (NULL == this || NULL == this->subObj) {
1524         return PICODATA_PU_ERROR;
1525     }
1526     tok = (tok_subobj_t *) this->subObj;
1527 
1528     mode = mode;        /* avoid warning "var not used in this function"*/
1529 
1530     *numBytesOutput = 0;
1531     while (1) { /* exit via return */
1532         picoos_int16 ch;
1533 
1534         if ((tok->outWritePos - tok->outReadPos) > 0) {
1535             if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) {
1536                 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
1537                     (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos);
1538                 tok->outReadPos += *numBytesOutput;
1539                 if (tok->outWritePos == tok->outReadPos) {
1540                     tok->outWritePos = 0;
1541                     tok->outReadPos = 0;
1542                 }
1543             }
1544             else {
1545                 return PICODATA_PU_OUT_FULL;
1546             }
1547 
1548         }
1549         else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) {
1550             PICODBG_DEBUG(("read in %c", (picoos_char) ch));
1551             tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE);
1552         }
1553         else {
1554             return PICODATA_PU_IDLE;
1555         }
1556     }
1557 }
1558 
1559 #ifdef __cplusplus
1560 }
1561 #endif
1562 
1563 /* end */
1564