1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picowa.h 18 * 19 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 20 * All rights reserved. 21 * 22 * History: 23 * - 2009-04-20 -- initial version 24 * 25 */ 26 27 28 /** 29 * @addtogroup picowa 30 * ---------------------------------------------------\n 31 * <b> Pico Word Analysis </b>\n 32 * ---------------------------------------------------\n 33 itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content 34 in the following 35 36 items input\n 37 =========== 38 39 processed by wa: 40 - WORDGRAPH(NA,NA)graph 41 - OTHER(NA,NA)string 42 43 unprocessed: 44 - all other item types are forwarded through the PU without modification: 45 - PUNC 46 - CMD 47 48 49 minimal input size (before processing starts)\n 50 ================== 51 52 processing (ie. lex lookup and POS prediction) is possible with 53 - one item 54 55 56 items processed and output\n 57 ========================== 58 59 processing an input WORDGRAPH results in one of the following items: 60 - WORDGRAPH(POSes,NA)graph 61 - graph not in lex, POSes determined with dtree, or 62 - graph in lex - single entry without phone (:G2P), POSes from lex 63 - WORDINDEX(POSes,NA)pos1|ind1...posN|indN 64 - graph in lex - {1,4} entries with phone, pos1...posN from lex, 65 {1,4} lexentries indices in content, POSes combined with map table 66 in klex 67 68 processing an input OTHER results in the item being skipped (in the 69 future this can be extended to e.g. spelling) 70 71 see picotok.h for PUNC and CMD 72 73 - POSes %d 74 - is the superset of all single POS and POS combinations defined 75 in the lingware as unique symbol 76 - graph, len>0, utf8 graphemes, %s 77 - pos1|ind1, pos2|ind2, ..., posN|indN 78 - pos? are the single, unambiguous POS only, one byte %d 79 - ind? are the lexentry indices, three bytes %d %d %d 80 81 82 lexicon (system lexicon, but must also be ensured for user lexica)\n 83 ======= 84 85 - POS GRAPH PHON, all mandatory, but 86 - * PHON can be an empty string -> no pronunciation in the resulting TTS output 87 - * PHON can be :G2P -> use G2P later to add pronunciation 88 - (POS,GRAPH) is a uniq key (only one entry allowed) 89 - (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and 90 differing POS and differing PHON possible) 91 - for one graph we can have 2-4 solutions from the lex which all 92 need to be passed on the the next PU 93 - in this case GRAPH, POS, and PHON all must be available in lex 94 - in this case for each entry only a non-ambiguous, unique POS ID 95 is possible) 96 97 other limitations\n 98 ================= 99 100 - item size: header plus len=256 (valid for Pico in general) 101 - wa uses one item context only -> internal buffer set to 256+4 102 */ 103 104 105 #ifndef PICOWA_H_ 106 #define PICOWA_H_ 107 108 #include "picoos.h" 109 #include "picodata.h" 110 #include "picorsrc.h" 111 112 #ifdef __cplusplus 113 extern "C" { 114 #endif 115 #if 0 116 } 117 #endif 118 119 120 /* maximum length of an item incl. head for input and output buffers */ 121 #define PICOWA_MAXITEMSIZE 260 122 123 124 picodata_ProcessingUnit picowa_newWordAnaUnit( 125 picoos_MemoryManager mm, 126 picoos_Common common, 127 picodata_CharBuffer cbIn, 128 picodata_CharBuffer cbOut, 129 picorsrc_Voice voice); 130 131 #ifdef __cplusplus 132 } 133 #endif 134 135 #endif /*PICOWA_H_*/ 136