1 /*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 /**
17 * @file picokdt.c
18 *
19 * knowledge handling for decision trees
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27 */
28
29 #include "picoos.h"
30 #include "picodbg.h"
31 #include "picobase.h"
32 #include "picoknow.h"
33 #include "picodata.h"
34 #include "picokdt.h"
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 #if 0
40 }
41 #endif
42
43
44 /* ************************************************************/
45 /* decision tree */
46 /* ************************************************************/
47
48 /**
49 * @addtogroup picokdt
50 * ---------------------------------------------------\n
51 * <b> Pico KDT support </b>\n
52 * ---------------------------------------------------\n
53 overview extended binary tree file:
54 - dt consists of optional attribute mapping tables and a non-empty
55 tree part
56 - using the attribute mapping tables an attribute value as used
57 throughout the TTS can be mapped to its smaller representation
58 used in the tree
59 - multi-byte values always little endian
60
61 -------------------------------------------------------------------
62 - bin-file, decision tree knowledge base in binary form
63
64 - dt-kb = header inputmaptables outputmaptables tree
65
66
67 - header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2
68
69 - INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from
70 the start of kb to the start of input map tables,
71 may not be 0
72 - OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from
73 the start of kb to the start of outtables,
74 may not be 0
75 - TREEPOS2: two bytes, equals offest in number of bytes from the
76 start of kb to the start of the tree
77
78
79 - inputmaptables = maptables
80 - outputmaptables = maptables
81 - maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1
82 - maptable = LENTABLE2 TABLETYPE1 ( bytemaptable
83 | wordmaptable
84 | graphinmaptable
85 | bytetovarmaptable )
86 - bytemaptable (in or out, usage varies) = NRBYTES2 {BYTE1}=NRBYTES2
87 - wordmaptable (in or out, usage varies) = NRWORDS2 {WORD2}=NRWORDS2
88 - graphinmaptable (in only) = NRGRAPHS2 {GRAPH1:4}=NRGRAPHS2
89 - bytetovarmaptable (out only) = NRINBYTES2 outvarsearchind
90 outvaroutputs
91 - outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2
92 - outvaroutputs = {VARVALID1:}=NRINBYTES2
93
94 - bytemaptable: fixed size, *Map*Fixed \n
95 - wordmaptable: fixed size, *Map*Fixed \n
96 - graphinmaptable: search value is variable size (UTF8 grapheme), \n
97 value to be mapped to is fixed size, one byte \n
98 - bytetovarmaptable: search value is fixed size, one byte, values \n
99 to be mapped to are of variable size (e.g. several \n
100 phones) \n
101
102 - NRMAPTABLES1: one byte representing the number of map tables
103 - LENTABLE2: two bytes, equals offset to the next table (or next
104 part of kb, e.g. tree),
105 if LENTABLE2 = 3, and
106 TABLETYPE1 = EMPTY -> empty table, no mapping to be done
107 - TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8)
108 - NRBYTES2: two bytes, number of bytes following in the table (one
109 would be okay, to simplify some implementation also set
110 to 2)
111 - BYTE1: one btye, the sequence is used to determine the values
112 being mapped to, starting with 0
113 - NRWORDS2: two bytes, number of words (two btyes) following in the table
114 - WORD2: two bytes, the sequence is used to determine the values
115 being mapped to, starting with 0
116 - NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following
117 in table
118 - GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the
119 sequence of graphemes is used to determine the value being
120 mapped to, starting with 0, the length information is
121 encoded in UTF8, no need for extra length info
122 - NRINBYTES2: two bytes, number of single byte IDs the tree can produce
123 - OUTVAROFFSET2: two bytes, offset from the start of the
124 outvaroutputs to the start of the following output
125 phone ID group, ie. the first outvaroffset is the
126 offset to the start of the second PHONEID
127 group. Using the previous outvaroffset (or the start
128 of the outvaroutputs) the start and lenth of the
129 PHONEID group can be determined and we can get the
130 sequence of output values we map the chunk value to
131 - VARVALID1:: one to several bytes, one byte each for an output phone ID
132
133 - tree = treenodeinfos TREEBODYSIZE4 treebody
134 - treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields
135 - vfields = {VFIELD1}=NRVFIELDS1
136 - qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1
137 - treebody = "cf. code"
138
139 - TREEBODYSIZE4: four bytes, size of treebody in number of bytes
140 - NRVFIELDS1: one byte, number of node properties in the following
141 vector (predefined and fixed sequence of properties)
142 - VFIELD1: number of bits used to represent a node property
143 - NRATTRIBUTES1: one byte, number of attributes (rows) in the
144 following matrix
145 - NRQFIELDS1: one byte, number (columns) of question-dependent node
146 properties per attribute in the following matrix
147 (predefined and fixed sequence of properties)
148 - QFIELD1: number of bits used to represent a question-dependent
149 property in the matrix
150
151
152 - Currently,
153 - NRVFIELDS1 is fixed at 2 for all trees, ie.
154 - vfields = 2 aVFIELD1 bVFIELD1
155 - aVFIELD1: nr of bits for questions
156 - bVFIELD1: nr of bits for decisions
157
158 - NRQFIELDS1 is fixed at 5 for all trees, ie. \n
159 - qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n
160 - aQFIELD1: nr of bits for fork count \n
161 - bQFIELD1: nr of bits for start position for subsets \n
162 - cQFIELD1: nr of bits for group size \n
163 - dQFIELD1: nr of bits for offset to reach output \n
164 - eQFIELD1: nr of bits for threshold (if continuous node) \n
165 */
166
167
168 /* ************************************************************/
169 /* decision tree data defines */
170 /* may not be changed with current implementation */
171 /* ************************************************************/
172
173 /* maptables fields */
174 #define PICOKDT_MTSPOS_NRMAPTABLES 0
175
176 /* position of first byte of first maptable (for omt the only table */
177 #define PICOKDT_MTPOS_START 1
178
179 /* maptable fields */
180 #define PICOKDT_MTPOS_LENTABLE 0
181 #define PICOKDT_MTPOS_TABLETYPE 2
182 #define PICOKDT_MTPOS_NUMBER 3
183 #define PICOKDT_MTPOS_MAPSTART 5
184
185 /* treenodeinfos fields */
186 #define PICOKDT_NIPOS_NRVFIELDS 0
187 #define PICOKDT_NIPOS_NRATTS 3
188 #define PICOKDT_NIPOS_NRQFIELDS 4
189
190 /* fixed treenodeinfos number of fields */
191 #define PICOKDT_NODEINFO_NRVFIELDS 2
192 #define PICOKDT_NODEINFO_NRQFIELDS 5
193
194 /* fixed number of bits used */
195 #define PICOKDT_NODETYPE_NRBITS 2
196 #define PICOKDT_SUBSETTYPE_NRBITS 2
197 #define PICOKDT_ISDECIDE_NRBITS 1
198
199 /* number of inpmaptables for each tree. Since we have a possibly
200 empty input map table for each att, currently these values must be
201 equal to PICOKDT_NRATT* */
202 typedef enum {
203 PICOKDT_NRINPMT_POSP = 12,
204 PICOKDT_NRINPMT_POSD = 7,
205 PICOKDT_NRINPMT_G2P = 16,
206 PICOKDT_NRINPMT_PHR = 8,
207 PICOKDT_NRINPMT_ACC = 13,
208 PICOKDT_NRINPMT_PAM = 60
209 } kdt_nrinpmaptables_t;
210
211 /* number of outmaptables for each tree, at least one, possibly empty,
212 output map table for each tree */
213 typedef enum {
214 PICOKDT_NROUTMT_POSP = 1,
215 PICOKDT_NROUTMT_POSD = 1,
216 PICOKDT_NROUTMT_G2P = 1,
217 PICOKDT_NROUTMT_PHR = 1,
218 PICOKDT_NROUTMT_ACC = 1,
219 PICOKDT_NROUTMT_PAM = 1
220 } kdt_nroutmaptables_t;
221
222 /* maptable types */
223 typedef enum {
224 PICOKDT_MTTYPE_EMPTY = 0,
225 PICOKDT_MTTYPE_BYTE = 1,
226 PICOKDT_MTTYPE_WORD = 2,
227 PICOKDT_MTTYPE_GRAPH = 3,
228 PICOKDT_MTTYPE_BYTETOVAR = 4
229 } kdt_mttype_t;
230
231
232 /* ************************************************************/
233 /* decision tree types and loading */
234 /* ************************************************************/
235 /* object : Dt*KnowledgeBase
236 * shortcut : kdt*
237 * derived from : picoknow_KnowledgeBase
238 */
239
240 /* subobj shared by all decision trees */
241 typedef struct {
242 picokdt_kdttype_t type;
243 picoos_uint8 *inpmaptable;
244 picoos_uint8 *outmaptable;
245 picoos_uint8 *tree;
246 picoos_uint32 beg_offset[128]; /* for efficiency */
247
248 /* tree-internal details for faster processing */
249 picoos_uint8 *vfields;
250 picoos_uint8 *qfields;
251 picoos_uint8 nrattributes;
252 picoos_uint8 *treebody;
253 /*picoos_uint8 nrvfields;*/ /* fix PICOKDT_NODEINFO_NRVFIELDS */
254 /*picoos_uint8 nrqfields;*/ /* fix PICOKDT_NODEINFO_NRQFIELDS */
255
256 /* direct output vector (no output mapping) */
257 picoos_uint8 dset; /* TRUE if class set, FALSE otherwise */
258 picoos_uint16 dclass;
259 } kdt_subobj_t;
260
261 /* subobj specific for each decision tree type */
262 typedef struct {
263 kdt_subobj_t dt;
264 picoos_uint16 invec[PICOKDT_NRATT_POSP]; /* input vector */
265 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
266 } kdtposp_subobj_t;
267
268 typedef struct {
269 kdt_subobj_t dt;
270 picoos_uint16 invec[PICOKDT_NRATT_POSD]; /* input vector */
271 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
272 } kdtposd_subobj_t;
273
274 typedef struct {
275 kdt_subobj_t dt;
276 picoos_uint16 invec[PICOKDT_NRATT_G2P]; /* input vector */
277 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
278 } kdtg2p_subobj_t;
279
280 typedef struct {
281 kdt_subobj_t dt;
282 picoos_uint16 invec[PICOKDT_NRATT_PHR]; /* input vector */
283 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
284 } kdtphr_subobj_t;
285
286 typedef struct {
287 kdt_subobj_t dt;
288 picoos_uint16 invec[PICOKDT_NRATT_ACC]; /* input vector */
289 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
290 } kdtacc_subobj_t;
291
292 typedef struct {
293 kdt_subobj_t dt;
294 picoos_uint16 invec[PICOKDT_NRATT_PAM]; /* input vector */
295 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
296 } kdtpam_subobj_t;
297
298
kdtDtInitialize(register picoknow_KnowledgeBase this,picoos_Common common,kdt_subobj_t * dtp)299 static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this,
300 picoos_Common common,
301 kdt_subobj_t *dtp) {
302 picoos_uint16 inppos;
303 picoos_uint16 outpos;
304 picoos_uint16 treepos;
305 picoos_uint32 curpos = 0, pos;
306 picoos_uint16 lentable;
307 picoos_uint16 i;
308 picoos_uint8 imtnr;
309
310 PICODBG_DEBUG(("start"));
311
312 /* get inmap, outmap, tree offsets */
313 if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos))
314 && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos))
315 && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos,
316 &treepos))) {
317
318 /* all pos are mandatory, verify */
319 if (inppos && outpos && treepos) {
320 dtp->inpmaptable = this->base + inppos;
321 dtp->outmaptable = this->base + outpos;
322 dtp->tree = this->base + treepos;
323 /* precalc beg offset table */
324 imtnr=dtp->inpmaptable[0];
325 pos=1;
326 dtp->beg_offset[0] = 1;
327 for (i = 0; i < imtnr; i++) {
328 lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 |
329 dtp->inpmaptable[pos];
330 pos += lentable;
331 dtp->beg_offset[i+1] = pos;
332 }
333 } else {
334 dtp->inpmaptable = NULL;
335 dtp->outmaptable = NULL;
336 dtp->tree = NULL;
337 PICODBG_ERROR(("invalid kb position info"));
338 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
339 NULL, NULL);
340 }
341
342 /* nr of outmaptables is equal 1 for all trees, verify */
343 if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) {
344 PICODBG_ERROR(("wrong number of outmaptables"));
345 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
346 NULL, NULL);
347 }
348
349 /* check if this is an empty table, ie. len == 3 */
350 if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE]
351 == 3)
352 && (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE
353 + 1] == 0)) {
354 /* verify that this is supposed to be an empty table and
355 set outmaptable to NULL if so */
356 if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]
357 == PICOKDT_MTTYPE_EMPTY) {
358 dtp->outmaptable = NULL;
359 } else {
360 PICODBG_ERROR(("table length vs. type problem"));
361 return picoos_emRaiseException(common->em,
362 PICO_EXC_FILE_CORRUPT,
363 NULL, NULL);
364 }
365 }
366
367 dtp->vfields = dtp->tree + 1;
368 dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3;
369 dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS];
370 dtp->treebody = dtp->qfields + 4 +
371 (dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/
372
373 /*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */
374 /*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */
375 /* verify that nrvfields ad nrqfields are correct */
376 if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) ||
377 (PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) {
378 PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)",
379 dtp->tree[PICOKDT_NIPOS_NRVFIELDS],
380 dtp->tree[PICOKDT_NIPOS_NRQFIELDS]));
381 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
382 NULL, NULL);
383 }
384 dtp->dset = 0;
385 dtp->dclass = 0;
386 PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d",
387 dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable),
388 (dtp->tree - dtp->inpmaptable)));
389 return PICO_OK;
390 } else {
391 PICODBG_ERROR(("problem reading kb in memory"));
392 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
393 NULL, NULL);
394 }
395 }
396
397
kdtDtCheck(register picoknow_KnowledgeBase this,picoos_Common common,kdt_subobj_t * dtp,kdt_nratt_t nratt,kdt_nrinpmaptables_t nrinpmt,kdt_nroutmaptables_t nroutmt,kdt_mttype_t mttype)398 static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this,
399 picoos_Common common,
400 kdt_subobj_t *dtp,
401 kdt_nratt_t nratt,
402 kdt_nrinpmaptables_t nrinpmt,
403 kdt_nroutmaptables_t nroutmt,
404 kdt_mttype_t mttype) {
405 /* check nr attributes */
406 /* check nr inpmaptables */
407 /* check nr outmaptables */
408 /* check outmaptable is word type */
409 if ((nratt != dtp->nrattributes)
410 || (dtp->inpmaptable == NULL)
411 || (dtp->outmaptable == NULL)
412 || (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt)
413 || (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt)
414 || (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE]
415 != mttype)) {
416 PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d",
417 dtp->nrattributes,
418 dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
419 dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
420 dtp->outmaptable[PICOKDT_MTPOS_START +
421 PICOKDT_MTPOS_TABLETYPE]));
422 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
423 NULL, NULL);
424 }
425 return PICO_OK;
426 }
427
428
429
kdtPosPInitialize(register picoknow_KnowledgeBase this,picoos_Common common)430 static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this,
431 picoos_Common common) {
432 pico_status_t status;
433 kdtposp_subobj_t *dtposp;
434 kdt_subobj_t *dt;
435 picoos_uint8 i;
436
437 if (NULL == this || NULL == this->subObj) {
438 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
439 NULL, NULL);
440 }
441 dtposp = (kdtposp_subobj_t *)this->subObj;
442 dt = &(dtposp->dt);
443 dt->type = PICOKDT_KDTTYPE_POSP;
444 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
445 return status;
446 }
447 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP,
448 PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP,
449 PICOKDT_MTTYPE_WORD)) != PICO_OK) {
450 return status;
451 }
452
453 /* init specialized subobj part */
454 for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
455 dtposp->invec[i] = 0;
456 }
457 dtposp->inveclen = 0;
458 PICODBG_DEBUG(("posp tree initialized"));
459 return PICO_OK;
460 }
461
462
kdtPosDInitialize(register picoknow_KnowledgeBase this,picoos_Common common)463 static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this,
464 picoos_Common common) {
465 pico_status_t status;
466 kdtposd_subobj_t *dtposd;
467 kdt_subobj_t *dt;
468 picoos_uint8 i;
469
470 if (NULL == this || NULL == this->subObj) {
471 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
472 NULL, NULL);
473 }
474 dtposd = (kdtposd_subobj_t *)this->subObj;
475 dt = &(dtposd->dt);
476 dt->type = PICOKDT_KDTTYPE_POSD;
477 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
478 return status;
479 }
480 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD,
481 PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD,
482 PICOKDT_MTTYPE_WORD)) != PICO_OK) {
483 return status;
484 }
485
486 /* init spezialized subobj part */
487 for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
488 dtposd->invec[i] = 0;
489 }
490 dtposd->inveclen = 0;
491 PICODBG_DEBUG(("posd tree initialized"));
492 return PICO_OK;
493 }
494
495
kdtG2PInitialize(register picoknow_KnowledgeBase this,picoos_Common common)496 static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this,
497 picoos_Common common) {
498 pico_status_t status;
499 kdtg2p_subobj_t *dtg2p;
500 kdt_subobj_t *dt;
501 picoos_uint8 i;
502
503 if (NULL == this || NULL == this->subObj) {
504 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
505 NULL, NULL);
506 }
507 dtg2p = (kdtg2p_subobj_t *)this->subObj;
508 dt = &(dtg2p->dt);
509 dt->type = PICOKDT_KDTTYPE_G2P;
510 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
511 return status;
512 }
513
514 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P,
515 PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P,
516 PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) {
517 return status;
518 }
519
520 /* init spezialized subobj part */
521 for (i = 0; i < PICOKDT_NRATT_G2P; i++) {
522 dtg2p->invec[i] = 0;
523 }
524 dtg2p->inveclen = 0;
525 PICODBG_DEBUG(("g2p tree initialized"));
526 return PICO_OK;
527 }
528
529
kdtPhrInitialize(register picoknow_KnowledgeBase this,picoos_Common common)530 static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this,
531 picoos_Common common) {
532 pico_status_t status;
533 kdtphr_subobj_t *dtphr;
534 kdt_subobj_t *dt;
535 picoos_uint8 i;
536
537 if (NULL == this || NULL == this->subObj) {
538 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
539 NULL, NULL);
540 }
541 dtphr = (kdtphr_subobj_t *)this->subObj;
542 dt = &(dtphr->dt);
543 dt->type = PICOKDT_KDTTYPE_PHR;
544 if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) {
545 return status;
546 }
547
548 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR,
549 PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR,
550 PICOKDT_MTTYPE_WORD)) != PICO_OK) {
551 return status;
552 }
553
554 /* init spezialized subobj part */
555 for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
556 dtphr->invec[i] = 0;
557 }
558 dtphr->inveclen = 0;
559 PICODBG_DEBUG(("phr tree initialized"));
560 return PICO_OK;
561 }
562
563
kdtAccInitialize(register picoknow_KnowledgeBase this,picoos_Common common)564 static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this,
565 picoos_Common common) {
566 pico_status_t status;
567 kdtacc_subobj_t *dtacc;
568 kdt_subobj_t *dt;
569 picoos_uint8 i;
570
571 if (NULL == this || NULL == this->subObj) {
572 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
573 NULL, NULL);
574 }
575 dtacc = (kdtacc_subobj_t *)this->subObj;
576 dt = &(dtacc->dt);
577 dt->type = PICOKDT_KDTTYPE_ACC;
578 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
579 return status;
580 }
581
582 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC,
583 PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC,
584 PICOKDT_MTTYPE_WORD)) != PICO_OK) {
585 return status;
586 }
587
588 /* init spezialized subobj part */
589 for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
590 dtacc->invec[i] = 0;
591 }
592 dtacc->inveclen = 0;
593 PICODBG_DEBUG(("acc tree initialized"));
594 return PICO_OK;
595 }
596
597
kdtPamInitialize(register picoknow_KnowledgeBase this,picoos_Common common)598 static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this,
599 picoos_Common common) {
600 pico_status_t status;
601 kdtpam_subobj_t *dtpam;
602 kdt_subobj_t *dt;
603 picoos_uint8 i;
604
605 if (NULL == this || NULL == this->subObj) {
606 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
607 NULL, NULL);
608 }
609 dtpam = (kdtpam_subobj_t *)this->subObj;
610 dt = &(dtpam->dt);
611 dt->type = PICOKDT_KDTTYPE_PAM;
612 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
613 return status;
614 }
615
616 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM,
617 PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM,
618 PICOKDT_MTTYPE_WORD)) != PICO_OK) {
619 return status;
620 }
621
622 /* init spezialized subobj part */
623 for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
624 dtpam->invec[i] = 0;
625 }
626 dtpam->inveclen = 0;
627 PICODBG_DEBUG(("pam tree initialized"));
628 return PICO_OK;
629 }
630
631
kdtSubObjDeallocate(register picoknow_KnowledgeBase this,picoos_MemoryManager mm)632 static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this,
633 picoos_MemoryManager mm) {
634 if (NULL != this) {
635 picoos_deallocate(mm, (void *) &this->subObj);
636 }
637 return PICO_OK;
638 }
639
640
641 /* we don't offer a specialized constructor for a *KnowledgeBase but
642 * instead a "specializer" of an allready existing generic
643 * picoknow_KnowledgeBase */
644
picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,picoos_Common common,const picokdt_kdttype_t kdttype)645 pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,
646 picoos_Common common,
647 const picokdt_kdttype_t kdttype) {
648 pico_status_t status;
649
650 if (NULL == this) {
651 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
652 NULL, NULL);
653 }
654 this->subDeallocate = kdtSubObjDeallocate;
655 switch (kdttype) {
656 case PICOKDT_KDTTYPE_POSP:
657 this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t));
658 if (NULL == this->subObj) {
659 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
660 NULL, NULL);
661 }
662 status = kdtPosPInitialize(this, common);
663 break;
664 case PICOKDT_KDTTYPE_POSD:
665 this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t));
666 if (NULL == this->subObj) {
667 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
668 NULL, NULL);
669 }
670 status = kdtPosDInitialize(this, common);
671 break;
672 case PICOKDT_KDTTYPE_G2P:
673 this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t));
674 if (NULL == this->subObj) {
675 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
676 NULL, NULL);
677 }
678 status = kdtG2PInitialize(this, common);
679 break;
680 case PICOKDT_KDTTYPE_PHR:
681 this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t));
682 if (NULL == this->subObj) {
683 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
684 NULL, NULL);
685 }
686 status = kdtPhrInitialize(this, common);
687 break;
688 case PICOKDT_KDTTYPE_ACC:
689 this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t));
690 if (NULL == this->subObj) {
691 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
692 NULL, NULL);
693 }
694 status = kdtAccInitialize(this, common);
695 break;
696 case PICOKDT_KDTTYPE_PAM:
697 this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t));
698 if (NULL == this->subObj) {
699 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
700 NULL, NULL);
701 }
702 status = kdtPamInitialize(this, common);
703 break;
704 default:
705 return picoos_emRaiseException(common->em, PICO_ERR_OTHER,
706 NULL, NULL);
707 }
708
709 if (status != PICO_OK) {
710 picoos_deallocate(common->mm, (void *) &this->subObj);
711 return picoos_emRaiseException(common->em, status, NULL, NULL);
712 }
713 return PICO_OK;
714 }
715
716
717 /* ************************************************************/
718 /* decision tree getDt* */
719 /* ************************************************************/
720
picokdt_getDtPosP(picoknow_KnowledgeBase this)721 picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) {
722 return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj));
723 }
724
picokdt_getDtPosD(picoknow_KnowledgeBase this)725 picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) {
726 return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj));
727 }
728
picokdt_getDtG2P(picoknow_KnowledgeBase this)729 picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this) {
730 return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj));
731 }
732
picokdt_getDtPHR(picoknow_KnowledgeBase this)733 picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this) {
734 return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj));
735 }
736
picokdt_getDtACC(picoknow_KnowledgeBase this)737 picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this) {
738 return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj));
739 }
740
picokdt_getDtPAM(picoknow_KnowledgeBase this)741 picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this) {
742 return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj));
743 }
744
745
746
747 /* ************************************************************/
748 /* decision tree support functions, tree */
749 /* ************************************************************/
750
751
752 typedef enum {
753 eQuestion = 0, /* index to #bits to identify question */
754 eDecide = 1 /* index to #bits to identify decision */
755 } kdt_vfields_ind_t;
756
757 typedef enum {
758 eForkCount = 0, /* index to #bits for number of forks */
759 eBitNo = 1, /* index to #bits for index of 1st element */
760 eBitCount = 2, /* index to #bits for size of the group */
761 eJump = 3, /* index to #bits for offset to reach output node */
762 eCut = 4 /* for contin. node: #bits for threshold checked */
763 } kdt_qfields_ind_t;
764
765 typedef enum {
766 eNTerminal = 0,
767 eNBinary = 1,
768 eNContinuous = 2,
769 eNDiscrete = 3
770 } kdt_nodetypes_t;
771
772 typedef enum {
773 eOneValue = 0,
774 eTwoValues = 1,
775 eWithoutBitMask = 2,
776 eBitMask = 3
777 } kdt_subsettypes_t;
778
779
780 /* Name : kdt_jump
781 Function: maps the iJump offset to byte + bit coordinates
782 Input : iJump absolute bit offset (0..(nr-bytes-treebody)*8)
783 Output : iByteNo the first byte containing the bits to extract
784 (0..(nr-bytes-treebody))
785 iBitNo the first bit to be extracted (0..7)
786 Returns : void
787 Notes : updates the iByteNo + iBitNo fields
788 */
kdt_jump(const picoos_uint32 iJump,picoos_uint32 * iByteNo,picoos_int8 * iBitNo)789 static void kdt_jump(const picoos_uint32 iJump,
790 picoos_uint32 *iByteNo,
791 picoos_int8 *iBitNo) {
792 picoos_uint32 iByteSize;
793
794 iByteSize = (iJump / 8 );
795 *iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo);
796 *iByteNo += iByteSize;
797 if (*iBitNo >= 8) {
798 (*iByteNo)++;
799 *iBitNo = 15 - *iBitNo;
800 } else {
801 *iBitNo = 7 - *iBitNo;
802 }
803 }
804
805
806 /* replaced inline for speedup */
807 /* Name : kdtIsVal
808 Function: Returns the binary value of the bit pointed to by iByteNo, iBitNo
809 Input : iByteNo ofsset to the byte containing the bits to extract
810 (0..sizeof(treebody))
811 iBitNo ofsset to the first bit to be extracted (0..7)
812 Returns : 0/1 depending on the bit pointed to
813 */
814 /*
815 static picoos_uint8 kdtIsVal(register kdt_subobj_t *this,
816 picoos_uint32 iByteNo,
817 picoos_int8 iBitNo) {
818 return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0);
819 }
820 */
821
822
823 /* @todo : consider replacing inline for speedup */
824
825 /* Name : kdtGetQFieldsVal (was: m_QuestDependentFields)
826 Function: gets a byte from qfields
827 Input : this handle to a dt subobj
828 attind index of the attribute
829 qind index of the byte to be read
830 Returns : the requested byte
831 Notes : check that attind < this->nrattributes needed before calling
832 this function!
833 */
kdtGetQFieldsVal(register kdt_subobj_t * this,const picoos_uint8 attind,const kdt_qfields_ind_t qind)834 static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this,
835 const picoos_uint8 attind,
836 const kdt_qfields_ind_t qind) {
837 /* check of qind done in initialize and (for some compilers) with typing */
838 /* check of attind needed before calling this function */
839 return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind];
840 }
841
842
843 /* Name : kdtGetShiftVal (was: get_shift_value)
844 Function: returns the (treebody) value pointed to by iByteNo, iBitNo,
845 and with size iSize
846 Input : this reference to the processing unit struct
847 iSize number of bits to be extracted (0..N)
848 iByteNo ofsset to the byte containing the bits to extract
849 (0..sizeof(treebody))
850 iBitNo ofsset to the first bit to be extracted (0..7)
851 Returns : the value requested (if size==0 --> 0 is returned)
852 */
853 /*
854 static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this,
855 const picoos_int16 iSize,
856 picoos_uint32 *iByteNo,
857 picoos_int8 *iBitNo) {
858 picoos_uint32 iVal;
859 picoos_int16 i;
860
861 iVal = 0;
862 for (i = iSize-1; i >= 0; i--) {
863 if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
864 iVal |= ( (1) << i );
865 }
866 (*iBitNo)--;
867 if (*iBitNo < 0) {
868 *iBitNo = 7;
869 (*iByteNo)++;
870 }
871 }
872 return iVal;
873 }
874 */
875 /* refactor */
kdtGetShiftVal(register kdt_subobj_t * this,const picoos_int16 iSize,picoos_uint32 * iByteNo,picoos_int8 * iBitNo)876 static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this,
877 const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo)
878 {
879 picoos_uint32 v, b, iVal;
880 picoos_int16 i, j, len;
881 picoos_uint8 val;
882
883 if (iSize < 4) {
884 iVal = 0;
885 for (i = iSize - 1; i >= 0; i--) {
886 /* no check that *iByteNo is within valid treebody range */
887 if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) {
888 iVal |= ((1) << i);
889 }
890 (*iBitNo)--;
891 if (*iBitNo < 0) {
892 *iBitNo = 7;
893 (*iByteNo)++;
894 }
895 }
896 return iVal;
897 }
898
899 b = *iByteNo;
900 j = *iBitNo;
901 len = iSize;
902 *iBitNo = j - iSize;
903 v = 0;
904 while (*iBitNo < 0) {
905 *iBitNo += 8;
906 (*iByteNo)++;
907 }
908
909 val = this->treebody[b++];
910 if (j < 7) {
911 switch (j) {
912 case 0:
913 val &= 0x01;
914 break;
915 case 1:
916 val &= 0x03;
917 break;
918 case 2:
919 val &= 0x07;
920 break;
921 case 3:
922 val &= 0x0f;
923 break;
924 case 4:
925 val &= 0x1f;
926 break;
927 case 5:
928 val &= 0x3f;
929 break;
930 case 6:
931 val &= 0x7f;
932 break;
933 }
934 }
935 len -= j + 1;
936 if (len < 0) {
937 val >>= -len;
938 }
939 v = val;
940 while (len > 0) {
941 if (len >= 8) {
942 j = 8;
943 } else {
944 j = len;
945 }
946 v <<= j;
947 val = this->treebody[b++];
948 if (j < 8) {
949 switch (j) {
950 case 1:
951 val &= 0x80;
952 val >>= 7;
953 break;
954 case 2:
955 val &= 0xc0;
956 val >>= 6;
957 break;
958 case 3:
959 val &= 0xe0;
960 val >>= 5;
961 break;
962 case 4:
963 val &= 0xf0;
964 val >>= 4;
965 break;
966 case 5:
967 val &= 0xf8;
968 val >>= 3;
969 break;
970 case 6:
971 val &= 0xfc;
972 val >>= 2;
973 break;
974 case 7:
975 val &= 0xfe;
976 val >>= 1;
977 break;
978 }
979 }
980 v |= val;
981 len -= j;
982 }
983 return v;
984 }
985
986
987 /* Name : kdtAskTree
988 Function: Tree Traversal routine
989 Input : iByteNo ofsset to the first byte containing the bits
990 to extract (0..sizeof(treebody))
991 iBitNo ofsset to the first bit to be extracted (0..7)
992 Returns : >0 continue, no solution yet found
993 =0 solution found
994 <0 error, no solution found
995 Notes :
996 */
kdtAskTree(register kdt_subobj_t * this,picoos_uint16 * invec,const kdt_nratt_t invecmax,picoos_uint32 * iByteNo,picoos_int8 * iBitNo)997 static picoos_int8 kdtAskTree(register kdt_subobj_t *this,
998 picoos_uint16 *invec,
999 const kdt_nratt_t invecmax,
1000 picoos_uint32 *iByteNo,
1001 picoos_int8 *iBitNo) {
1002 picoos_uint32 iNodeType;
1003 picoos_uint8 iQuestion;
1004 picoos_int32 iVal;
1005 picoos_int32 iForks;
1006 picoos_int32 iID;
1007
1008 picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision;
1009 picoos_int32 i;
1010 picoos_char iIsDecide;
1011
1012 PICODBG_TRACE(("start"));
1013
1014 /* get node type, value should be in kdt_nodetype_t range */
1015 iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo);
1016 PICODBG_TRACE(("iNodeType: %d", iNodeType));
1017
1018 /* get attribute to be used in question, check if in range, and get val */
1019 /* check of vfields argument done in initialize */
1020 iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo);
1021 if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) {
1022 iVal = invec[iQuestion];
1023 } else {
1024 this->dset = FALSE;
1025 PICODBG_TRACE(("invalid question"));
1026 return -1; /* iQuestion invalid */
1027 }
1028 iForks = 0;
1029 iID = -1;
1030 PICODBG_TRACE(("iQuestion: %d", iQuestion));
1031
1032 switch (iNodeType) {
1033 case eNBinary: {
1034 iForks = 2;
1035 iID = iVal;
1036 break;
1037 }
1038 case eNContinuous: {
1039 iForks = 2;
1040 iID = 1;
1041 iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut),
1042 iByteNo, iBitNo); /*read the threshold*/
1043 if (iVal <= iCut) {
1044 iID = 0;
1045 }
1046 break;
1047 }
1048 case eNDiscrete: {
1049 iForks =
1050 kdtGetShiftVal(this,
1051 kdtGetQFieldsVal(this, iQuestion, eForkCount),
1052 iByteNo, iBitNo);
1053
1054 for (i = 0; i < iForks-1; i++) {
1055 iSubsetType =
1056 kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS,
1057 iByteNo, iBitNo);
1058
1059 switch (iSubsetType) {
1060 case eOneValue: {
1061 if (iID > -1) {
1062 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
1063 iByteNo, iBitNo);
1064 break;
1065 }
1066 iBitPos =
1067 kdtGetShiftVal(this,
1068 kdtGetQFieldsVal(this, iQuestion,
1069 eBitNo),
1070 iByteNo, iBitNo);
1071 if (iVal == iBitPos) {
1072 iID = i;
1073 }
1074 break;
1075 }
1076 case eTwoValues: {
1077 if (iID > -1) {
1078 kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
1079 kdtGetQFieldsVal(this, iQuestion, eBitCount)),
1080 iByteNo, iBitNo);
1081 break;
1082 }
1083
1084 iBitPos =
1085 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1086 eBitNo),
1087 iByteNo, iBitNo);
1088 iBitCount =
1089 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1090 eBitCount),
1091 iByteNo, iBitNo);
1092 if ((iVal == iBitPos) || (iVal == iBitCount)) {
1093 iID = i;
1094 }
1095 break;
1096 }
1097 case eWithoutBitMask: {
1098 if (iID > -1) {
1099 kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
1100 kdtGetQFieldsVal(this, iQuestion, eBitCount)),
1101 iByteNo, iBitNo);
1102 break;
1103 }
1104
1105 iBitPos =
1106 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1107 eBitNo),
1108 iByteNo, iBitNo);
1109 iBitCount =
1110 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
1111 eBitCount),
1112 iByteNo, iBitNo);
1113 if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
1114 iID = i;
1115 }
1116 break;
1117 }
1118 case eBitMask: {
1119 iBitPos = 0;
1120 if (iID > -1) {
1121 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
1122 iByteNo, iBitNo);
1123 } else {
1124 iBitPos =
1125 kdtGetShiftVal(this,
1126 kdtGetQFieldsVal(this, iQuestion,
1127 eBitNo),
1128 iByteNo, iBitNo);
1129 }
1130
1131 iBitCount =
1132 kdtGetShiftVal(this,
1133 kdtGetQFieldsVal(this, iQuestion,
1134 eBitCount),
1135 iByteNo, iBitNo);
1136 if (iID > -1) {
1137 kdt_jump(iBitCount, iByteNo, iBitNo);
1138 break;
1139 }
1140
1141 if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
1142 iPos = iVal - iBitPos;
1143 kdt_jump((iVal - iBitPos), iByteNo, iBitNo);
1144 /* if (kdtIsVal(this, *iByteNo, *iBitNo))*/
1145 if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
1146 iID = i;
1147 }
1148 kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo);
1149 } else {
1150 kdt_jump(iBitCount, iByteNo, iBitNo);
1151 }
1152 break;
1153 }/*end case eBitMask*/
1154 }/*end switch (iSubsetType)*/
1155 }/*end for ( i = 0; i < iForks-1; i++ ) */
1156
1157 /*default tree branch*/
1158 if (-1 == iID) {
1159 iID = iForks-1;
1160 }
1161 break;
1162 }/*end case eNDiscrete*/
1163 }/*end switch (iNodeType)*/
1164
1165 for (i = 0; i < iForks; i++) {
1166 iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo);
1167
1168 PICODBG_TRACE(("doing forks: %d", i));
1169
1170 if (!iIsDecide) {
1171 if (iID == i) {
1172 iJump =
1173 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump),
1174 iByteNo, iBitNo);
1175 kdt_jump(iJump, iByteNo, iBitNo);
1176 this->dset = FALSE;
1177 return 1; /* to be continued, no solution yet found */
1178 } else {
1179 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump),
1180 iByteNo, iBitNo);
1181 }
1182 } else {
1183 if (iID == i) {
1184 /* check of vfields argument done in initialize */
1185 iDecision = kdtGetShiftVal(this, this->vfields[eDecide],
1186 iByteNo, iBitNo);
1187 this->dclass = iDecision;
1188 this->dset = TRUE;
1189 return 0; /* solution found */
1190 } else {
1191 /* check of vfields argument done in initialize */
1192 kdt_jump(this->vfields[eDecide], iByteNo, iBitNo);
1193 }
1194 }/*end if (!iIsDecide)*/
1195 }/*end for (i = 0; i < iForks; i++ )*/
1196
1197 this->dset = FALSE;
1198 PICODBG_TRACE(("problem determining class"));
1199 return -1; /* solution not found, problem determining a class */
1200 }
1201
1202
1203
1204 /* ************************************************************/
1205 /* decision tree support functions, mappings */
1206 /* ************************************************************/
1207
1208
1209 /* size==1 -> MapInByte, size==2 -> MapInWord,
1210 size determined from table type contained in kb.
1211 if the inmaptable is empty, outval = inval */
1212
kdtMapInFixed(const kdt_subobj_t * dt,const picoos_uint8 imtnr,const picoos_uint16 inval,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1213 static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt,
1214 const picoos_uint8 imtnr,
1215 const picoos_uint16 inval,
1216 picoos_uint16 *outval,
1217 picoos_uint16 *outfallbackval) {
1218 picoos_uint8 size;
1219 picoos_uint32 pos;
1220 picoos_uint16 lentable;
1221 picoos_uint16 posbound;
1222 picoos_uint16 i;
1223
1224 *outval = 0;
1225 *outfallbackval = 0;
1226
1227 size = 0;
1228 pos = 0;
1229
1230 /* check what can be checked */
1231 if (imtnr >= dt->inpmaptable[pos++]) { /* outside tablenr range? */
1232 PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d",
1233 dt->inpmaptable[pos-1], imtnr));
1234 return FALSE;
1235 }
1236
1237 /* go forward to the needed tablenr */
1238 if (imtnr > 0) {
1239 pos = dt->beg_offset[imtnr];
1240 }
1241
1242 /* get length */
1243 lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1244 dt->inpmaptable[pos];
1245 posbound = pos + lentable;
1246 pos += 2;
1247
1248 /* check type of table and set size */
1249 if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) {
1250 /* empty table no mapping needed */
1251 PICODBG_TRACE(("empty table: %d", imtnr));
1252 *outval = inval;
1253 return TRUE;
1254 } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
1255 size = 1;
1256 } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) {
1257 size = 2;
1258 } else {
1259 /* wrong table type */
1260 PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos]));
1261 return FALSE;
1262 }
1263 pos++;
1264
1265 /* set fallback value in case of failed mapping, and set upper bound pos */
1266 *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1267 dt->inpmaptable[pos];
1268 pos += 2;
1269
1270 /* size must be 1 or 2 here, keep 'redundant' so save time */
1271 if (size == 1) {
1272 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1273 if (inval == dt->inpmaptable[pos]) {
1274 *outval = i;
1275 PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval));
1276 return TRUE;
1277 }
1278 pos++;
1279 }
1280 } else if (size == 2) {
1281 posbound--;
1282 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1283 if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1284 dt->inpmaptable[pos])) {
1285 *outval = i;
1286 PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval));
1287 return TRUE;
1288 }
1289 pos += 2;
1290 }
1291 } else {
1292 /* impossible size */
1293 PICODBG_ERROR(("wrong size %d", size));
1294 return FALSE;
1295 }
1296
1297 PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
1298 return FALSE;
1299 }
1300
1301
kdtMapInGraph(const kdt_subobj_t * dt,const picoos_uint8 imtnr,const picoos_uint8 * inval,const picoos_uint8 invalmaxlen,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1302 static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt,
1303 const picoos_uint8 imtnr,
1304 const picoos_uint8 *inval,
1305 const picoos_uint8 invalmaxlen,
1306 picoos_uint16 *outval,
1307 picoos_uint16 *outfallbackval) {
1308 picoos_uint8 ilen;
1309 picoos_uint8 tlen;
1310 picoos_uint8 cont;
1311 picoos_uint32 pos;
1312 picoos_uint16 lentable;
1313 picoos_uint16 posbound;
1314 picoos_uint16 i;
1315 picoos_uint8 j;
1316
1317 *outfallbackval = 0;
1318
1319 pos = 0;
1320 /* check what can be checked */
1321 if ((imtnr >= dt->inpmaptable[pos++]) || /* outside tablenr range? */
1322 (invalmaxlen == 0) || /* too short? */
1323 ((ilen = picobase_det_utf8_length(inval[0])) == 0) || /* invalid? */
1324 (ilen > invalmaxlen)) { /* not accessible? */
1325 PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, "
1326 "ilen: %d",
1327 dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen));
1328 return FALSE;
1329 }
1330
1331 /* go forward to the needed tablenr */
1332 for (i = 0; i < imtnr; i++) {
1333 lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1334 dt->inpmaptable[pos];
1335 pos += lentable;
1336 }
1337
1338 /* get length and check type of inpmaptable */
1339 lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1340 dt->inpmaptable[pos];
1341 posbound = pos + lentable;
1342 pos += 2;
1343
1344 #if defined(PICO_DEBUG)
1345 if (1) {
1346 int id;
1347 PICODBG_TRACE(("imtnr %d", imtnr));
1348 for (id = pos-2; id < posbound; id++) {
1349 PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2),
1350 dt->inpmaptable[id], dt->inpmaptable[id]));
1351 }
1352 }
1353 #endif
1354
1355 /* check type of table */
1356 if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) {
1357 /* empty table does not make sense for graph */
1358 /* wrong table type */
1359 PICODBG_ERROR(("wrong table type"));
1360 return FALSE;
1361 }
1362 pos++;
1363
1364 /* set fallback value in case of failed mapping, and set upper bound pos */
1365 *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
1366 dt->inpmaptable[pos];
1367 pos += 2;
1368
1369 /* sequential search */
1370 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1371 tlen = picobase_det_utf8_length(dt->inpmaptable[pos]);
1372 if ((pos + tlen) > posbound) {
1373 PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d",
1374 posbound, pos, tlen));
1375 return FALSE;
1376 }
1377 if (ilen == tlen) {
1378 cont = TRUE;
1379 for (j = 0; cont && (j < ilen); j++) {
1380 if (dt->inpmaptable[pos + j] != inval[j]) {
1381 cont = FALSE;
1382 }
1383 }
1384 if (cont && (j == ilen)) { /* match found */
1385 *outval = i;
1386 PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d",
1387 posbound, pos, i, tlen));
1388 return TRUE;
1389 }
1390 }
1391 pos += tlen;
1392 }
1393 PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d",
1394 imtnr, posbound, pos, i, *outfallbackval));
1395 return FALSE;
1396 }
1397
1398
1399 /* size==1 -> MapOutByte, size==2 -> MapOutWord */
kdtMapOutFixed(const kdt_subobj_t * dt,const picoos_uint16 inval,picoos_uint16 * outval)1400 static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt,
1401 const picoos_uint16 inval,
1402 picoos_uint16 *outval) {
1403 picoos_uint8 size;
1404 picoos_uint16 nr;
1405
1406 /* no check of lentable vs. nr in initialize done */
1407
1408 size = 0;
1409
1410 /* type */
1411 nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE];
1412
1413 /* check type of table and set size */
1414 if (nr == PICOKDT_MTTYPE_EMPTY) {
1415 /* empty table no mapping needed */
1416 PICODBG_TRACE(("empty table"));
1417 *outval = inval;
1418 return TRUE;
1419 } else if (nr == PICOKDT_MTTYPE_BYTE) {
1420 size = 1;
1421 } else if (nr == PICOKDT_MTTYPE_WORD) {
1422 size = 2;
1423 } else {
1424 /* wrong table type */
1425 PICODBG_ERROR(("wrong table type %d", nr));
1426 return FALSE;
1427 }
1428
1429 /* number of mapvalues */
1430 nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
1431 PICOKDT_MTPOS_NUMBER + 1])) << 8
1432 | dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER];
1433
1434 if (inval < nr) {
1435 if (size == 1) {
1436 *outval = dt->outmaptable[PICOKDT_MTPOS_START +
1437 PICOKDT_MTPOS_MAPSTART + (size * inval)];
1438 } else {
1439 *outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
1440 PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8
1441 | dt->outmaptable[PICOKDT_MTPOS_START +
1442 PICOKDT_MTPOS_MAPSTART + (size * inval)];
1443 }
1444 return TRUE;
1445 } else {
1446 *outval = 0;
1447 return FALSE;
1448 }
1449 }
1450
1451
1452 /* size==1 -> ReverseMapOutByte, size==2 -> ReverseMapOutWord */
1453 /* outmaptable also used to map from decoded tree output domain to
1454 direct tree output domain */
kdtReverseMapOutFixed(const kdt_subobj_t * dt,const picoos_uint16 inval,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1455 static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt,
1456 const picoos_uint16 inval,
1457 picoos_uint16 *outval,
1458 picoos_uint16 *outfallbackval) {
1459 picoos_uint8 size;
1460 picoos_uint32 pos;
1461 picoos_uint16 lentable;
1462 picoos_uint16 posbound;
1463 picoos_uint16 i;
1464
1465 /* no check of lentable vs. nr in initialize done */
1466
1467 size = 0;
1468 pos = 0;
1469 *outval = 0;
1470 *outfallbackval = 0;
1471
1472 if (dt->outmaptable == NULL) {
1473 /* empty table no mapping needed */
1474 PICODBG_TRACE(("empty table"));
1475 *outval = inval;
1476 return TRUE;
1477 }
1478
1479 /* check what can be checked */
1480 if (dt->outmaptable[pos++] != 1) { /* only one omt possible */
1481 PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1]));
1482 return FALSE;
1483 }
1484
1485 /* get length */
1486 lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1487 dt->outmaptable[pos];
1488 posbound = pos + lentable;
1489 pos += 2;
1490
1491 /* check type of table and set size */
1492 /* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in
1493 ...Initialize the omt is set to NULL if not existing, checked
1494 above */
1495
1496 if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
1497 size = 1;
1498 } else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) {
1499 size = 2;
1500 } else {
1501 /* wrong table type */
1502 PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
1503 return FALSE;
1504 }
1505 pos++;
1506
1507 /* set fallback value in case of failed mapping, and set upper bound pos */
1508 *outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1509 dt->outmaptable[pos];
1510 pos += 2;
1511
1512 /* size must be 1 or 2 here, keep 'redundant' so save time */
1513 if (size == 1) {
1514 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1515 if (inval == dt->outmaptable[pos]) {
1516 *outval = i;
1517 PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval));
1518 return TRUE;
1519 }
1520 pos++;
1521 }
1522 } else if (size == 2) {
1523 posbound--;
1524 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
1525 if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1526 dt->outmaptable[pos])) {
1527 *outval = i;
1528 PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval));
1529 return TRUE;
1530 }
1531 pos += 2;
1532 }
1533 } else {
1534 /* impossible size */
1535 PICODBG_ERROR(("wrong size %d", size));
1536 return FALSE;
1537 }
1538
1539 PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
1540 return FALSE;
1541 }
1542
1543
picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,const picoos_uint16 inval,picoos_uint16 * outval,picoos_uint16 * outfallbackval)1544 picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,
1545 const picoos_uint16 inval,
1546 picoos_uint16 *outval,
1547 picoos_uint16 *outfallbackval) {
1548
1549 kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this;
1550 kdt_subobj_t * dt = &(dtposd->dt);
1551 return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval);
1552 }
1553
1554 /* not yet impl. size==1 -> MapOutByteToVar,
1555 fix: size==2 -> MapOutWordToVar */
kdtMapOutVar(const kdt_subobj_t * dt,const picoos_uint16 inval,picoos_uint8 * nr,picoos_uint16 * outval,const picoos_uint16 outvalmaxlen)1556 static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt,
1557 const picoos_uint16 inval,
1558 picoos_uint8 *nr,
1559 picoos_uint16 *outval,
1560 const picoos_uint16 outvalmaxlen) {
1561 picoos_uint16 pos;
1562 picoos_uint16 off2ind;
1563 picoos_uint16 lentable;
1564 picoos_uint16 nrinbytes;
1565 picoos_uint8 size;
1566 picoos_uint16 offset1;
1567 picoos_uint16 i;
1568
1569 if (dt->outmaptable == NULL) {
1570 /* empty table not possible */
1571 PICODBG_ERROR(("no table found"));
1572 return FALSE;
1573 }
1574
1575 /* nr of tables == 1 already checked in *Initialize, no need here, go
1576 directly to position 1 */
1577 pos = 1;
1578
1579 /* get length of table */
1580 lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 |
1581 dt->outmaptable[pos]);
1582 pos += 2;
1583
1584 /* check table type */
1585 if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) {
1586 /* wrong table type */
1587 PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
1588 return FALSE;
1589 }
1590 size = 2;
1591 pos++;
1592
1593 /* get nr of ele in maptable (= nr of possible invals) */
1594 nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
1595 dt->outmaptable[pos]);
1596 pos += 2;
1597
1598 /* check what's checkable */
1599 if (nrinbytes == 0) {
1600 PICODBG_ERROR(("table with length zero"));
1601 return FALSE;
1602 } else if (inval >= nrinbytes) {
1603 PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes));
1604 return FALSE;
1605 }
1606
1607 PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval,
1608 lentable, nrinbytes, pos));
1609
1610 /* set off2ind to the position of the start of offset2-val */
1611 /* offset2 points to start of next ele */
1612 off2ind = pos + (size*inval);
1613
1614 /* get number of output values, offset2 - offset1 */
1615 if (inval == 0) {
1616 offset1 = 0;
1617 } else {
1618 offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 |
1619 dt->outmaptable[off2ind - 2]);
1620 }
1621 *nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 |
1622 dt->outmaptable[off2ind]) - offset1;
1623
1624 PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos));
1625
1626 /* set pos to position of 1st value being mapped to */
1627 pos += (size * nrinbytes) + offset1;
1628
1629 if ((pos + *nr - 1) > lentable) {
1630 /* outside table, should not happen */
1631 PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d",
1632 pos, *nr, lentable));
1633 return FALSE;
1634 }
1635 if (*nr > outvalmaxlen) {
1636 /* not enough space in outval */
1637 PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen));
1638 return FALSE;
1639 }
1640
1641 /* finally, copy outmap result to outval */
1642 for (i = 0; i < *nr; i++) {
1643 outval[i] = dt->outmaptable[pos++];
1644 }
1645 return TRUE;
1646 }
1647
1648
1649
1650 /* ************************************************************/
1651 /* decision tree POS prediction (PosP) functions */
1652 /* ************************************************************/
1653
1654 /* number of prefix and suffix graphemes used to construct the input vector */
1655 #define KDT_POSP_NRGRAPHPREFATT 4
1656 #define KDT_POSP_NRGRAPHSUFFATT 6
1657 #define KDT_POSP_NRGRAPHATT 10
1658
1659 /* positions of specgraph and nrgraphs attributes */
1660 #define KDT_POSP_SPECGRAPHATTPOS 10
1661 #define KDT_POSP_NRGRAPHSATTPOS 11
1662
1663
1664 /* construct PosP input vector
1665
1666 PosP invec: 12 elements
1667
1668 prefix 0-3 prefix graphemes (encoded using tree inpmaptable 0-3)
1669 suffix 4-9 suffix graphemes (encoded using tree inpmaptable 4-9)
1670 isspecchar 10 is a special grapheme (e.g. hyphen) inside the word (0/1)?
1671 nr-utf-graphs 11 number of graphemes (ie. UTF8 chars)
1672
1673 if there are less than 10 graphemes, each grapheme is used only
1674 once, with the suffix having higher priority, ie. elements 0-9 are
1675 filled as follows:
1676
1677 #graph
1678 1 0 0 0 0 0 0 0 0 0 1
1679 2 0 0 0 0 0 0 0 0 1 2
1680 3 0 0 0 0 0 0 0 1 2 3
1681 4 0 0 0 0 0 0 1 2 3 4
1682 5 0 0 0 0 0 1 2 3 4 5
1683 6 0 0 0 0 1 2 3 4 5 6
1684 7 1 0 0 0 2 3 4 5 6 7
1685 8 1 2 0 0 3 4 5 6 7 8
1686 9 1 2 3 0 4 5 6 7 8 9
1687 10 1 2 3 4 5 6 7 8 9 10
1688 11 1 2 3 4 6 7 8 9 10 11
1689 ...
1690
1691 1-6: Fill chbuf
1692 7-10: front to invec 1st part, remove front, add rear
1693 >10: remove front, add rear
1694 no more graph ->
1695 while chbuflen>0:
1696 add rear to the last empty slot in 2nd part of invec, remove rear
1697 */
1698
1699
picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,const picoos_uint8 * graph,const picoos_uint16 graphlen,const picoos_uint8 specgraphflag)1700 picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,
1701 const picoos_uint8 *graph,
1702 const picoos_uint16 graphlen,
1703 const picoos_uint8 specgraphflag) {
1704 kdtposp_subobj_t *dtposp;
1705
1706 /* utf8 circular char buffer, used as restricted input deque */
1707 /* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */
1708 /* max of UTF8_MAXLEN bytes per utf8 char */
1709 picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN];
1710 picoos_uint8 chbrear; /* next free pos */
1711 picoos_uint8 chbfront; /* next read pos */
1712 picoos_uint8 chblen; /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */
1713
1714 picoos_uint16 poscg; /* position of current graph (= utf8 char) */
1715 picoos_uint16 lencg = 0; /* length of current grapheme */
1716 picoos_uint16 nrutfg; /* number of utf graphemes */
1717 picoos_uint8 invecpos; /* next element to add in invec */
1718 picoos_uint16 fallback; /* fallback value for failed graph encodings */
1719 picoos_uint8 i;
1720
1721 dtposp = (kdtposp_subobj_t *)this;
1722 chbrear = 0;
1723 chbfront = 0;
1724 chblen = 0;
1725 poscg = 0;
1726 nrutfg = 0;
1727 invecpos = 0;
1728
1729 PICODBG_DEBUG(("graphlen %d", graphlen));
1730
1731 /* not needed, since all elements are set
1732 for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
1733 dtposp->invec[i] = '\x63';
1734 }
1735 */
1736
1737 dtposp->inveclen = 0;
1738
1739 while ((poscg < graphlen) &&
1740 ((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) {
1741 if (chblen >= KDT_POSP_NRGRAPHSUFFATT) { /* chbuf full */
1742 if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */
1743 /* att-encode front utf graph and add in invec */
1744 if (!kdtMapInGraph(&(dtposp->dt), invecpos,
1745 chbuf[chbfront], PICOBASE_UTF8_MAXLEN,
1746 &(dtposp->invec[invecpos]),
1747 &fallback)) {
1748 if (fallback) {
1749 dtposp->invec[invecpos] = fallback;
1750 } else {
1751 return FALSE;
1752 }
1753 }
1754 invecpos++;
1755 }
1756 /* remove front utf graph */
1757 chbfront++;
1758 chbfront %= KDT_POSP_NRGRAPHSUFFATT;
1759 chblen--;
1760 }
1761 /* add current utf graph to chbuf */
1762 for (i=0; i<lencg; i++) {
1763 chbuf[chbrear][i] = graph[poscg++];
1764 }
1765 if (i < PICOBASE_UTF8_MAXLEN) {
1766 chbuf[chbrear][i] = '\0';
1767 }
1768 chbrear++;
1769 chbrear %= KDT_POSP_NRGRAPHSUFFATT;
1770 chblen++;
1771 /* increase utf graph count */
1772 nrutfg++;
1773 }
1774
1775 if ((lencg == 0) || (chblen == 0)) {
1776 return FALSE;
1777 } else if (chblen > 0) {
1778
1779 while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */
1780 if (!kdtMapInGraph(&(dtposp->dt), invecpos,
1781 PICOKDT_OUTSIDEGRAPH_DEFSTR,
1782 PICOKDT_OUTSIDEGRAPH_DEFLEN,
1783 &(dtposp->invec[invecpos]), &fallback)) {
1784 if (fallback) {
1785 dtposp->invec[invecpos] = fallback;
1786 } else {
1787 return FALSE;
1788 }
1789 }
1790 invecpos++;
1791 }
1792
1793 for (i = (KDT_POSP_NRGRAPHATT - 1);
1794 i >= KDT_POSP_NRGRAPHPREFATT; i--) {
1795 if (chblen > 0) {
1796 if (chbrear == 0) {
1797 chbrear = KDT_POSP_NRGRAPHSUFFATT - 1;
1798 } else {
1799 chbrear--;
1800 }
1801 if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear],
1802 PICOBASE_UTF8_MAXLEN,
1803 &(dtposp->invec[i]), &fallback)) {
1804 if (fallback) {
1805 dtposp->invec[i] = fallback;
1806 } else {
1807 return FALSE;
1808 }
1809 }
1810 chblen--;
1811 } else {
1812 if (!kdtMapInGraph(&(dtposp->dt), i,
1813 PICOKDT_OUTSIDEGRAPH_DEFSTR,
1814 PICOKDT_OUTSIDEGRAPH_DEFLEN,
1815 &(dtposp->invec[i]), &fallback)) {
1816 if (fallback) {
1817 dtposp->invec[i] = fallback;
1818 } else {
1819 return FALSE;
1820 }
1821 }
1822 }
1823 }
1824
1825 /* set isSpecChar attribute, reuse var i */
1826 i = (specgraphflag ? 1 : 0);
1827 if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i,
1828 &(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]),
1829 &fallback)) {
1830 if (fallback) {
1831 dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback;
1832 } else {
1833 return FALSE;
1834 }
1835 }
1836
1837 /* set nrGraphs attribute */
1838 if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg,
1839 &(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]),
1840 &fallback)) {
1841 if (fallback) {
1842 dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback;
1843 } else {
1844 return FALSE;
1845 }
1846 }
1847 PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]",
1848 dtposp->invec[0], dtposp->invec[1], dtposp->invec[2],
1849 dtposp->invec[3], dtposp->invec[4], dtposp->invec[5],
1850 dtposp->invec[6], dtposp->invec[7], dtposp->invec[8],
1851 dtposp->invec[9], dtposp->invec[10],
1852 dtposp->invec[11], dtposp->invec[12]));
1853 dtposp->inveclen = PICOKDT_NRINPMT_POSP;
1854 return TRUE;
1855 }
1856
1857 return FALSE;
1858 }
1859
1860
picokdt_dtPosPclassify(const picokdt_DtPosP this)1861 picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) {
1862 picoos_uint32 iByteNo;
1863 picoos_int8 iBitNo;
1864 picoos_int8 rv;
1865 kdtposp_subobj_t *dtposp;
1866 kdt_subobj_t *dt;
1867
1868 dtposp = (kdtposp_subobj_t *)this;
1869 dt = &(dtposp->dt);
1870 iByteNo = 0;
1871 iBitNo = 7;
1872 while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP,
1873 &iByteNo, &iBitNo)) > 0) {
1874 PICODBG_TRACE(("asking tree"));
1875 }
1876 PICODBG_DEBUG(("done: %d", dt->dclass));
1877 return ((rv == 0) && dt->dset);
1878 }
1879
1880
picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,picokdt_classify_result_t * dtres)1881 picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,
1882 picokdt_classify_result_t *dtres) {
1883 kdtposp_subobj_t *dtposp;
1884 picoos_uint16 val;
1885
1886 dtposp = (kdtposp_subobj_t *)this;
1887
1888 if (dtposp->dt.dset &&
1889 kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) {
1890 dtres->set = TRUE;
1891 dtres->class = val;
1892 return TRUE;
1893 } else {
1894 dtres->set = FALSE;
1895 return FALSE;
1896 }
1897 }
1898
1899
1900
1901 /* ************************************************************/
1902 /* decision tree POS disambiguation (PosD) functions */
1903 /* ************************************************************/
1904
1905
picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,const picoos_uint16 * input)1906 picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,
1907 const picoos_uint16 * input) {
1908 kdtposd_subobj_t *dtposd;
1909 picoos_uint8 i;
1910 picoos_uint16 fallback = 0;
1911
1912 dtposd = (kdtposd_subobj_t *)this;
1913 dtposd->inveclen = 0;
1914
1915 PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]",
1916 input[0], input[1], input[2],
1917 input[3], input[4], input[5],
1918 input[6]));
1919 for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
1920
1921 /* do the imt mapping for all inval */
1922 if (!kdtMapInFixed(&(dtposd->dt), i, input[i],
1923 &(dtposd->invec[i]), &fallback)) {
1924 if (fallback) {
1925 PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback));
1926 dtposd->invec[i] = fallback;
1927 } else {
1928 PICODBG_ERROR(("problem doing input mapping"));
1929 return FALSE;
1930 }
1931 }
1932 }
1933
1934 PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]",
1935 dtposd->invec[0], dtposd->invec[1], dtposd->invec[2],
1936 dtposd->invec[3], dtposd->invec[4], dtposd->invec[5],
1937 dtposd->invec[6]));
1938 dtposd->inveclen = PICOKDT_NRINPMT_POSD;
1939 return TRUE;
1940 }
1941
1942
picokdt_dtPosDclassify(const picokdt_DtPosD this,picoos_uint16 * treeout)1943 picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this,
1944 picoos_uint16 *treeout) {
1945 picoos_uint32 iByteNo;
1946 picoos_int8 iBitNo;
1947 picoos_int8 rv;
1948 kdtposd_subobj_t *dtposd;
1949 kdt_subobj_t *dt;
1950
1951 dtposd = (kdtposd_subobj_t *)this;
1952 dt = &(dtposd->dt);
1953 iByteNo = 0;
1954 iBitNo = 7;
1955 while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD,
1956 &iByteNo, &iBitNo)) > 0) {
1957 PICODBG_TRACE(("asking tree"));
1958 }
1959 PICODBG_DEBUG(("done: %d", dt->dclass));
1960 if ((rv == 0) && dt->dset) {
1961 *treeout = dt->dclass;
1962 return TRUE;
1963 } else {
1964 return FALSE;
1965 }
1966 }
1967
1968
1969 /* decompose the tree output and return the class in dtres
1970 dtres: POS classification result
1971 returns: TRUE if okay, FALSE otherwise
1972 */
picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,picokdt_classify_result_t * dtres)1973 picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,
1974 picokdt_classify_result_t *dtres) {
1975 kdtposd_subobj_t *dtposd;
1976 picoos_uint16 val;
1977
1978 dtposd = (kdtposd_subobj_t *)this;
1979
1980 if (dtposd->dt.dset &&
1981 kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) {
1982 dtres->set = TRUE;
1983 dtres->class = val;
1984 return TRUE;
1985 } else {
1986 dtres->set = FALSE;
1987 return FALSE;
1988 }
1989 }
1990
1991
1992
1993 /* ************************************************************/
1994 /* decision tree grapheme-to-phoneme (G2P) functions */
1995 /* ************************************************************/
1996
1997
1998 /* get the nr'th (starting at 0) utf char in utfgraph */
kdtGetUTF8char(const picoos_uint8 * utfgraph,const picoos_uint16 graphlen,const picoos_uint16 nr,picoos_uint8 * utf8char)1999 static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph,
2000 const picoos_uint16 graphlen,
2001 const picoos_uint16 nr,
2002 picoos_uint8 *utf8char) {
2003 picoos_uint16 i;
2004 picoos_uint32 pos;
2005
2006 pos = 0;
2007 for (i = 0; i < nr; i++) {
2008 if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) {
2009 return FALSE;
2010 }
2011 }
2012 return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char);
2013 }
2014
2015 /* determine the utfchar count (starting at 1) of the utfchar starting at pos */
kdtGetUTF8Nr(const picoos_uint8 * utfgraph,const picoos_uint16 graphlen,const picoos_uint16 pos)2016 static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph,
2017 const picoos_uint16 graphlen,
2018 const picoos_uint16 pos) {
2019 picoos_uint32 postmp;
2020 picoos_uint16 count;
2021
2022 count = 0;
2023 postmp = 0;
2024 while ((postmp <= pos) && (count < graphlen)) {
2025 if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) {
2026 PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d",
2027 count, pos, postmp));
2028 return count + 1;
2029 }
2030 count++;
2031 }
2032 return count;
2033 }
2034
2035
picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,const picoos_uint8 * graph,const picoos_uint16 graphlen,const picoos_uint8 count,const picoos_uint8 pos,const picoos_uint8 nrvow,const picoos_uint8 ordvow,picoos_uint8 * primstressflag,const picoos_uint16 phonech1,const picoos_uint16 phonech2,const picoos_uint16 phonech3)2036 picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,
2037 const picoos_uint8 *graph,
2038 const picoos_uint16 graphlen,
2039 const picoos_uint8 count,
2040 const picoos_uint8 pos,
2041 const picoos_uint8 nrvow,
2042 const picoos_uint8 ordvow,
2043 picoos_uint8 *primstressflag,
2044 const picoos_uint16 phonech1,
2045 const picoos_uint16 phonech2,
2046 const picoos_uint16 phonech3) {
2047 kdtg2p_subobj_t *dtg2p;
2048 picoos_uint16 fallback = 0;
2049 picoos_uint8 iAttr;
2050 picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1];
2051 picoos_uint16 inval;
2052 picoos_int16 cinv;
2053 picoos_uint8 retval;
2054 picoos_int32 utfgraphlen;
2055 picoos_uint16 utfcount;
2056
2057 dtg2p = (kdtg2p_subobj_t *)this;
2058 retval = TRUE;
2059 inval = 0;
2060
2061 PICODBG_TRACE(("in: [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos,
2062 nrvow, ordvow, *primstressflag, phonech1, phonech2,
2063 phonech3));
2064
2065 dtg2p->inveclen = 0;
2066
2067 /* many speed-ups possible */
2068
2069 /* graph attributes */
2070 /* count > = <= count
2071 iAttr lowbound eow upbound delta
2072 0 4 4 graphlen 5
2073 1 3 3 graphlen 4
2074 2 2 2 graphlen 3
2075 3 1 1 graphlen 2
2076 4 0 - graphlen 1
2077
2078 5 0 graphlen graphlen-1 0
2079 6 0 graphlen-1 graphlen-2 -1
2080 7 0 graphlen-2 graphlen-3 -2
2081 8 0 graphlen-3 graphlen-4 -3
2082 */
2083
2084 /* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */
2085
2086 utfgraphlen = picobase_utf8_length(graph, graphlen);
2087 if (utfgraphlen <= 0) {
2088 utfgraphlen = 0;
2089 }
2090 utfcount = kdtGetUTF8Nr(graph, graphlen, count);
2091
2092 cinv = 4;
2093 for (iAttr = 0; iAttr < 5; iAttr++) {
2094 if ((utfcount > cinv) && (utfcount <= utfgraphlen)) {
2095
2096 /* utf8char[0] = graph[count - cinv - 1];*/
2097 if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1,
2098 utf8char)) {
2099 PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1));
2100 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2101 utf8char[1] = '\0';
2102 }
2103 } else {
2104 if ((utfcount == cinv) && (iAttr != 4)) {
2105 utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
2106 } else {
2107 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2108 }
2109 utf8char[1] = '\0';
2110 }
2111
2112 if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
2113 utf8char, PICOBASE_UTF8_MAXLEN,
2114 &(dtg2p->invec[iAttr]),
2115 &fallback)) {
2116 if (fallback) {
2117 dtg2p->invec[iAttr] = fallback;
2118 } else {
2119 PICODBG_WARN(("setting attribute %d to zero", iAttr));
2120 dtg2p->invec[iAttr] = 0;
2121 retval = FALSE;
2122 }
2123 }
2124 PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
2125 cinv--;
2126 }
2127
2128 /* graph attributes right (context 1/2/3/4), MapInGraph */
2129 cinv = utfgraphlen;
2130 for (iAttr = 5; iAttr < 9; iAttr++) {
2131 if ((utfcount > 0) && (utfcount <= (cinv - 1))) {
2132 /* utf8char[0] = graph[count + graphlen - cinv];*/
2133 if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv,
2134 utf8char)) {
2135 PICODBG_WARN(("problem getting UTF char %d",
2136 utfcount+utfgraphlen-cinv-1));
2137 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2138 utf8char[1] = '\0';
2139 }
2140 } else {
2141 if (utfcount == cinv) {
2142 utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
2143 utf8char[1] = '\0';
2144 } else {
2145 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
2146 utf8char[1] = '\0';
2147 }
2148 }
2149 if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
2150 utf8char, PICOBASE_UTF8_MAXLEN,
2151 &(dtg2p->invec[iAttr]),
2152 &fallback)) {
2153 if (fallback) {
2154 dtg2p->invec[iAttr] = fallback;
2155 } else {
2156 PICODBG_WARN(("setting attribute %d to zero", iAttr));
2157 dtg2p->invec[iAttr] = 0;
2158 retval = FALSE;
2159 }
2160 }
2161 PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
2162 cinv--;
2163 }
2164
2165 /* other attributes, MapInFixed */
2166 for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) {
2167 switch (iAttr) {
2168 case 9: /* word POS, Fix1 */
2169 inval = pos;
2170 break;
2171 case 10: /* nr of vowel-like graphs in word, if vowel, Fix2 */
2172 inval = nrvow;
2173 break;
2174 case 11: /* order of current vowel-like graph in word, Fix2 */
2175 inval = ordvow;
2176 break;
2177 case 12: /* primary stress mark, Fix2 */
2178 if (*primstressflag == 1) {
2179 /*already set previously*/
2180 inval = 1;
2181 } else {
2182 inval = 0;
2183 }
2184 break;
2185 case 13: /* phone chunk right context +1, Hist */
2186 inval = phonech1;
2187 break;
2188 case 14: /* phone chunk right context +2, Hist */
2189 inval = phonech2;
2190 break;
2191 case 15: /* phone chunk right context +3, Hist */
2192 inval = phonech3;
2193 break;
2194 }
2195
2196 PICODBG_TRACE(("invec %d %d", iAttr, inval));
2197
2198 if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval,
2199 &(dtg2p->invec[iAttr]), &fallback)) {
2200 if (fallback) {
2201 dtg2p->invec[iAttr] = fallback;
2202 } else {
2203 PICODBG_WARN(("setting attribute %d to zero", iAttr));
2204 dtg2p->invec[iAttr] = 0;
2205 retval = FALSE;
2206 }
2207 }
2208 }
2209
2210 PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|"
2211 "%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1],
2212 dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4],
2213 dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7],
2214 dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10],
2215 dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13],
2216 dtg2p->invec[14], dtg2p->invec[15]));
2217
2218 dtg2p->inveclen = PICOKDT_NRINPMT_G2P;
2219 return retval;
2220 }
2221
2222
2223
2224
picokdt_dtG2Pclassify(const picokdt_DtG2P this,picoos_uint16 * treeout)2225 picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this,
2226 picoos_uint16 *treeout) {
2227 picoos_uint32 iByteNo;
2228 picoos_int8 iBitNo;
2229 picoos_int8 rv;
2230 kdtg2p_subobj_t *dtg2p;
2231 kdt_subobj_t *dt;
2232
2233 dtg2p = (kdtg2p_subobj_t *)this;
2234 dt = &(dtg2p->dt);
2235 iByteNo = 0;
2236 iBitNo = 7;
2237 while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P,
2238 &iByteNo, &iBitNo)) > 0) {
2239 PICODBG_TRACE(("asking tree"));
2240 }
2241 PICODBG_TRACE(("done: %d", dt->dclass));
2242 if ((rv == 0) && dt->dset) {
2243 *treeout = dt->dclass;
2244 return TRUE;
2245 } else {
2246 return FALSE;
2247 }
2248 }
2249
2250
2251
picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,picokdt_classify_vecresult_t * dtvres)2252 picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,
2253 picokdt_classify_vecresult_t *dtvres) {
2254 kdtg2p_subobj_t *dtg2p;
2255
2256 dtg2p = (kdtg2p_subobj_t *)this;
2257
2258 if (dtg2p->dt.dset &&
2259 kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr),
2260 dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) {
2261 return TRUE;
2262 } else {
2263 dtvres->nr = 0;
2264 return FALSE;
2265 }
2266 return TRUE;
2267 }
2268
2269
2270
2271 /* ************************************************************/
2272 /* decision tree phrasing (PHR) functions */
2273 /* ************************************************************/
2274
picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,const picoos_uint8 pre2,const picoos_uint8 pre1,const picoos_uint8 src,const picoos_uint8 fol1,const picoos_uint8 fol2,const picoos_uint16 nrwordspre,const picoos_uint16 nrwordsfol,const picoos_uint16 nrsyllsfol)2275 picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,
2276 const picoos_uint8 pre2,
2277 const picoos_uint8 pre1,
2278 const picoos_uint8 src,
2279 const picoos_uint8 fol1,
2280 const picoos_uint8 fol2,
2281 const picoos_uint16 nrwordspre,
2282 const picoos_uint16 nrwordsfol,
2283 const picoos_uint16 nrsyllsfol) {
2284 kdtphr_subobj_t *dtphr;
2285 picoos_uint8 i;
2286 picoos_uint16 inval = 0;
2287 picoos_uint16 fallback = 0;
2288
2289 dtphr = (kdtphr_subobj_t *)this;
2290 PICODBG_DEBUG(("in: [%d,%d|%d|%d,%d|%d,%d,%d]",
2291 pre2, pre1, src, fol1, fol2,
2292 nrwordspre, nrwordsfol, nrsyllsfol));
2293 dtphr->inveclen = 0;
2294
2295 for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
2296 switch (i) {
2297 case 0: inval = pre2; break;
2298 case 1: inval = pre1; break;
2299 case 2: inval = src; break;
2300 case 3: inval = fol1; break;
2301 case 4: inval = fol2; break;
2302 case 5: inval = nrwordspre; break;
2303 case 6: inval = nrwordsfol; break;
2304 case 7: inval = nrsyllsfol; break;
2305 default:
2306 PICODBG_ERROR(("size mismatch"));
2307 return FALSE;
2308 break;
2309 }
2310
2311 /* do the imt mapping for all inval */
2312 if (!kdtMapInFixed(&(dtphr->dt), i, inval,
2313 &(dtphr->invec[i]), &fallback)) {
2314 if (fallback) {
2315 dtphr->invec[i] = fallback;
2316 } else {
2317 PICODBG_ERROR(("problem doing input mapping"));
2318 return FALSE;
2319 }
2320 }
2321 }
2322
2323 PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]",
2324 dtphr->invec[0], dtphr->invec[1], dtphr->invec[2],
2325 dtphr->invec[3], dtphr->invec[4], dtphr->invec[5],
2326 dtphr->invec[6], dtphr->invec[7]));
2327 dtphr->inveclen = PICOKDT_NRINPMT_PHR;
2328 return TRUE;
2329 }
2330
2331
picokdt_dtPHRclassify(const picokdt_DtPHR this)2332 picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) {
2333 picoos_uint32 iByteNo;
2334 picoos_int8 iBitNo;
2335 picoos_int8 rv;
2336 kdtphr_subobj_t *dtphr;
2337 kdt_subobj_t *dt;
2338
2339 dtphr = (kdtphr_subobj_t *)this;
2340 dt = &(dtphr->dt);
2341 iByteNo = 0;
2342 iBitNo = 7;
2343 while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR,
2344 &iByteNo, &iBitNo)) > 0) {
2345 PICODBG_TRACE(("asking tree"));
2346 }
2347 PICODBG_DEBUG(("done: %d", dt->dclass));
2348 return ((rv == 0) && dt->dset);
2349 }
2350
2351
picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,picokdt_classify_result_t * dtres)2352 picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,
2353 picokdt_classify_result_t *dtres) {
2354 kdtphr_subobj_t *dtphr;
2355 picoos_uint16 val;
2356
2357 dtphr = (kdtphr_subobj_t *)this;
2358
2359 if (dtphr->dt.dset &&
2360 kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) {
2361 dtres->set = TRUE;
2362 dtres->class = val;
2363 return TRUE;
2364 } else {
2365 dtres->set = FALSE;
2366 return FALSE;
2367 }
2368 }
2369
2370
2371
2372 /* ************************************************************/
2373 /* decision tree phono-acoustical model (PAM) functions */
2374 /* ************************************************************/
2375
picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,const picoos_uint8 * vec,const picoos_uint8 veclen)2376 picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,
2377 const picoos_uint8 *vec,
2378 const picoos_uint8 veclen) {
2379 kdtpam_subobj_t *dtpam;
2380 picoos_uint8 i;
2381 picoos_uint16 fallback = 0;
2382
2383 dtpam = (kdtpam_subobj_t *)this;
2384
2385 PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d",
2386 vec[0], vec[1], vec[2], vec[3], vec[4],
2387 vec[5], vec[6], vec[7], vec[8], vec[9]));
2388 PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d",
2389 vec[10], vec[11], vec[12], vec[13], vec[14],
2390 vec[15], vec[16], vec[17], vec[18], vec[19]));
2391 PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d",
2392 vec[20], vec[21], vec[22], vec[23], vec[24],
2393 vec[25], vec[26], vec[27], vec[28], vec[29]));
2394 PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d",
2395 vec[30], vec[31], vec[32], vec[33], vec[34],
2396 vec[35], vec[36], vec[37], vec[38], vec[39]));
2397 PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d",
2398 vec[40], vec[41], vec[42], vec[43], vec[44],
2399 vec[45], vec[46], vec[47], vec[48], vec[49]));
2400 PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d",
2401 vec[50], vec[51], vec[52], vec[53], vec[54],
2402 vec[55], vec[56], vec[57], vec[58], vec[59]));
2403
2404 dtpam->inveclen = 0;
2405
2406 /* check veclen */
2407 if (veclen != PICOKDT_NRINPMT_PAM) {
2408 PICODBG_ERROR(("wrong number of input vector elements"));
2409 return FALSE;
2410 }
2411
2412 for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
2413
2414 /* do the imt mapping for all vec eles */
2415 if (!kdtMapInFixed(&(dtpam->dt), i, vec[i],
2416 &(dtpam->invec[i]), &fallback)) {
2417 if (fallback) {
2418 dtpam->invec[i] = fallback;
2419 } else {
2420 PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i]));
2421 return FALSE;
2422 }
2423 }
2424 }
2425
2426 PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d",
2427 dtpam->invec[0], dtpam->invec[1], dtpam->invec[2],
2428 dtpam->invec[3], dtpam->invec[4], dtpam->invec[5],
2429 dtpam->invec[6], dtpam->invec[7], dtpam->invec[8],
2430 dtpam->invec[9]));
2431 PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d",
2432 dtpam->invec[10], dtpam->invec[11], dtpam->invec[12],
2433 dtpam->invec[13], dtpam->invec[14], dtpam->invec[15],
2434 dtpam->invec[16], dtpam->invec[17], dtpam->invec[18],
2435 dtpam->invec[19]));
2436 PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d",
2437 dtpam->invec[20], dtpam->invec[21], dtpam->invec[22],
2438 dtpam->invec[23], dtpam->invec[24], dtpam->invec[25],
2439 dtpam->invec[26], dtpam->invec[27], dtpam->invec[28],
2440 dtpam->invec[29]));
2441 PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d",
2442 dtpam->invec[30], dtpam->invec[31], dtpam->invec[32],
2443 dtpam->invec[33], dtpam->invec[34], dtpam->invec[35],
2444 dtpam->invec[36], dtpam->invec[37], dtpam->invec[38],
2445 dtpam->invec[39]));
2446 PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d",
2447 dtpam->invec[40], dtpam->invec[41], dtpam->invec[42],
2448 dtpam->invec[43], dtpam->invec[44], dtpam->invec[45],
2449 dtpam->invec[46], dtpam->invec[47], dtpam->invec[48],
2450 dtpam->invec[49]));
2451 PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d",
2452 dtpam->invec[50], dtpam->invec[51], dtpam->invec[52],
2453 dtpam->invec[53], dtpam->invec[54], dtpam->invec[55],
2454 dtpam->invec[56], dtpam->invec[57], dtpam->invec[58],
2455 dtpam->invec[59]));
2456
2457 dtpam->inveclen = PICOKDT_NRINPMT_PAM;
2458 return TRUE;
2459 }
2460
2461
picokdt_dtPAMclassify(const picokdt_DtPAM this)2462 picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) {
2463 picoos_uint32 iByteNo;
2464 picoos_int8 iBitNo;
2465 picoos_int8 rv;
2466 kdtpam_subobj_t *dtpam;
2467 kdt_subobj_t *dt;
2468
2469 dtpam = (kdtpam_subobj_t *)this;
2470 dt = &(dtpam->dt);
2471 iByteNo = 0;
2472 iBitNo = 7;
2473 while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM,
2474 &iByteNo, &iBitNo)) > 0) {
2475 PICODBG_TRACE(("asking tree"));
2476 }
2477 PICODBG_DEBUG(("done: %d", dt->dclass));
2478 return ((rv == 0) && dt->dset);
2479 }
2480
2481
picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,picokdt_classify_result_t * dtres)2482 picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,
2483 picokdt_classify_result_t *dtres) {
2484 kdtpam_subobj_t *dtpam;
2485 picoos_uint16 val;
2486
2487 dtpam = (kdtpam_subobj_t *)this;
2488
2489 if (dtpam->dt.dset &&
2490 kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) {
2491 dtres->set = TRUE;
2492 dtres->class = val;
2493 return TRUE;
2494 } else {
2495 dtres->set = FALSE;
2496 return FALSE;
2497 }
2498 }
2499
2500
2501
2502 /* ************************************************************/
2503 /* decision tree accentuation (ACC) functions */
2504 /* ************************************************************/
2505
picokdt_dtACCconstructInVec(const picokdt_DtACC this,const picoos_uint8 pre2,const picoos_uint8 pre1,const picoos_uint8 src,const picoos_uint8 fol1,const picoos_uint8 fol2,const picoos_uint16 hist1,const picoos_uint16 hist2,const picoos_uint16 nrwordspre,const picoos_uint16 nrsyllspre,const picoos_uint16 nrwordsfol,const picoos_uint16 nrsyllsfol,const picoos_uint16 footwordsfol,const picoos_uint16 footsyllsfol)2506 picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this,
2507 const picoos_uint8 pre2,
2508 const picoos_uint8 pre1,
2509 const picoos_uint8 src,
2510 const picoos_uint8 fol1,
2511 const picoos_uint8 fol2,
2512 const picoos_uint16 hist1,
2513 const picoos_uint16 hist2,
2514 const picoos_uint16 nrwordspre,
2515 const picoos_uint16 nrsyllspre,
2516 const picoos_uint16 nrwordsfol,
2517 const picoos_uint16 nrsyllsfol,
2518 const picoos_uint16 footwordsfol,
2519 const picoos_uint16 footsyllsfol) {
2520 kdtacc_subobj_t *dtacc;
2521 picoos_uint8 i;
2522 picoos_uint16 inval = 0;
2523 picoos_uint16 fallback = 0;
2524
2525 dtacc = (kdtacc_subobj_t *)this;
2526 PICODBG_DEBUG(("in: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
2527 pre2, pre1, src, fol1, fol2, hist1, hist2,
2528 nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol,
2529 footwordsfol, footsyllsfol));
2530 dtacc->inveclen = 0;
2531
2532 for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
2533 switch (i) {
2534 case 0: inval = pre2; break;
2535 case 1: inval = pre1; break;
2536 case 2: inval = src; break;
2537 case 3: inval = fol1; break;
2538 case 4: inval = fol2; break;
2539 case 5: inval = hist1; break;
2540 case 6: inval = hist2; break;
2541 case 7: inval = nrwordspre; break;
2542 case 8: inval = nrsyllspre; break;
2543 case 9: inval = nrwordsfol; break;
2544 case 10: inval = nrsyllsfol; break;
2545 case 11: inval = footwordsfol; break;
2546 case 12: inval = footsyllsfol; break;
2547 default:
2548 PICODBG_ERROR(("size mismatch"));
2549 return FALSE;
2550 break;
2551 }
2552
2553 if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) {
2554 /* in input to this function the HISTORY_ZERO is used to
2555 mark the no-value-available case. For sparsity reasons
2556 this was not used in the training. For
2557 no-value-available cases, instead, do reverse out
2558 mapping of ACC0 to get tree domain for ACC0 */
2559 if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0,
2560 &inval, &fallback)) {
2561 if (fallback) {
2562 inval = fallback;
2563 } else {
2564 PICODBG_ERROR(("problem doing reverse output mapping"));
2565 return FALSE;
2566 }
2567 }
2568 }
2569
2570 /* do the imt mapping for all inval */
2571 if (!kdtMapInFixed(&(dtacc->dt), i, inval,
2572 &(dtacc->invec[i]), &fallback)) {
2573 if (fallback) {
2574 dtacc->invec[i] = fallback;
2575 } else {
2576 PICODBG_ERROR(("problem doing input mapping"));
2577 return FALSE;
2578 }
2579 }
2580 }
2581
2582 PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
2583 dtacc->invec[0], dtacc->invec[1], dtacc->invec[2],
2584 dtacc->invec[3], dtacc->invec[4], dtacc->invec[5],
2585 dtacc->invec[6], dtacc->invec[7], dtacc->invec[8],
2586 dtacc->invec[9], dtacc->invec[10], dtacc->invec[11],
2587 dtacc->invec[12]));
2588 dtacc->inveclen = PICOKDT_NRINPMT_ACC;
2589 return TRUE;
2590 }
2591
2592
picokdt_dtACCclassify(const picokdt_DtACC this,picoos_uint16 * treeout)2593 picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this,
2594 picoos_uint16 *treeout) {
2595 picoos_uint32 iByteNo;
2596 picoos_int8 iBitNo;
2597 picoos_int8 rv;
2598 kdtacc_subobj_t *dtacc;
2599 kdt_subobj_t *dt;
2600
2601 dtacc = (kdtacc_subobj_t *)this;
2602 dt = &(dtacc->dt);
2603 iByteNo = 0;
2604 iBitNo = 7;
2605 while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC,
2606 &iByteNo, &iBitNo)) > 0) {
2607 PICODBG_TRACE(("asking tree"));
2608 }
2609 PICODBG_TRACE(("done: %d", dt->dclass));
2610 if ((rv == 0) && dt->dset) {
2611 *treeout = dt->dclass;
2612 return TRUE;
2613 } else {
2614 return FALSE;
2615 }
2616 }
2617
2618
picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,picokdt_classify_result_t * dtres)2619 picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,
2620 picokdt_classify_result_t *dtres) {
2621 kdtacc_subobj_t *dtacc;
2622 picoos_uint16 val;
2623
2624 dtacc = (kdtacc_subobj_t *)this;
2625
2626 if (dtacc->dt.dset &&
2627 kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) {
2628 dtres->set = TRUE;
2629 dtres->class = val;
2630 return TRUE;
2631 } else {
2632 dtres->set = FALSE;
2633 return FALSE;
2634 }
2635 }
2636
2637 #ifdef __cplusplus
2638 }
2639 #endif
2640
2641
2642 /* end */
2643