1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* ---- includes ----------------------------------------------------------- */
18
19 #include "b_TensorEm/CompactMat.h"
20 #include "b_TensorEm/Functions.h"
21 #include "b_BasicEm/Math.h"
22 #include "b_BasicEm/Functions.h"
23 #include "b_BasicEm/Memory.h"
24
25 /* ------------------------------------------------------------------------- */
26
27 /* ========================================================================= */
28 /* */
29 /* ---- \ghd{ auxiliary functions } ---------------------------------------- */
30 /* */
31 /* ========================================================================= */
32
33 /* ------------------------------------------------------------------------- */
34
35 /** Returns dot product of inVec with indexed row
36 The result is a floating point expresstion:
37 upper 16 bit: signed value
38 lower 16 bit: signed exponent
39 */
bts_CompactMat_fltDotPrdRow(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,const int16 * inVecA,uint32 inNormBitsA,uint32 rowA)40 int32 bts_CompactMat_fltDotPrdRow( struct bbs_Context* cpA,
41 struct bts_CompactMat* ptrA,
42 const int16* inVecA,
43 uint32 inNormBitsA,
44 uint32 rowA )
45 {
46 const int16* rowPtrL = ptrA->cpsArrE.arrPtrE + ptrA->wordsPerRowE * rowA;
47
48 /* extract row-header info */
49 uint32 offsL = *rowPtrL++;
50 uint32 sizeL = *rowPtrL++;
51 int32 factorManL = *rowPtrL++;
52 int32 factorExpL = *rowPtrL++;
53 uint32 rowNormBitsL = *rowPtrL++;
54
55 /* consider possible overflow */
56 uint16 overflowBitsL = ( inNormBitsA + rowNormBitsL >= 31 ) ? inNormBitsA + rowNormBitsL - 31 : 0;
57
58 const int16* inPtrL = inVecA + offsL;
59
60 count_t iL;
61 int32 sumL = 0;
62
63 if( overflowBitsL == 0 ) /* raw dot product fits in int32 */
64 {
65 switch( ptrA->bitsPerValueE )
66 {
67 case 16:
68 {
69 for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ );
70 }
71 break;
72
73 #ifndef HW_TMS320C5x /* platforms that don't have int8 must use the 'default' implementation */
74
75 case 8:
76 {
77 const uint16* dpL = ( uint16* )rowPtrL;
78 for( iL = sizeL; iL >= 8; iL -= 8 )
79 {
80 sumL += ( ( int8 ) dpL[ 0 ] * ( int32 )inPtrL[ 0 ] );
81 sumL += ( ( int8 )( dpL[ 0 ] >> 8 ) * ( int32 )inPtrL[ 1 ] );
82 sumL += ( ( int8 ) dpL[ 1 ] * ( int32 )inPtrL[ 2 ] );
83 sumL += ( ( int8 )( dpL[ 1 ] >> 8 ) * ( int32 )inPtrL[ 3 ] );
84 sumL += ( ( int8 ) dpL[ 2 ] * ( int32 )inPtrL[ 4 ] );
85 sumL += ( ( int8 )( dpL[ 2 ] >> 8 ) * ( int32 )inPtrL[ 5 ] );
86 sumL += ( ( int8 ) dpL[ 3 ] * ( int32 )inPtrL[ 6 ] );
87 sumL += ( ( int8 )( dpL[ 3 ] >> 8 ) * ( int32 )inPtrL[ 7 ] );
88 dpL += 4;
89 inPtrL += 8;
90 }
91 for( ; iL >= 2; iL -= 2 )
92 {
93 sumL += ( ( int8 ) *dpL * ( int32 )inPtrL[ 0 ] );
94 sumL += ( ( int8 )( *dpL >> 8 ) * ( int32 )inPtrL[ 1 ] );
95 dpL++;
96 inPtrL += 2;
97 }
98 if( iL > 0 )
99 {
100 sumL += ( ( int8 )*dpL++ * ( int32 )inPtrL[ 0 ] );
101 }
102 }
103 break;
104
105 case 6:
106 {
107 const uint16* dpL = ( uint16* )rowPtrL;
108 for( iL = sizeL; iL >= 8; iL -= 8 )
109 {
110 int32 lSumL = 0;
111 lSumL += ( ( int8 ) ( dpL[ 0 ] << 2 ) * ( int32 )inPtrL[ 0 ] );
112 lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 4 ) & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
113 lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
114 lSumL += ( ( int8 ) ( ( dpL[ 1 ] ) & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
115 lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
116 lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
117 lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 2 ) & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
118 lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 8 ) & 0x00FC ) * ( int32 )inPtrL[ 7 ] );
119 sumL += ( lSumL >> 2 );
120 dpL += 3;
121 inPtrL += 8;
122 }
123
124 {
125 int32 lSumL = 0;
126 if( iL > 0 ) lSumL += ( ( int8 ) ( dpL[ 0 ] << 2 ) * ( int32 )inPtrL[ 0 ] );
127 if( iL > 1 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 4 ) & 0x00FC ) * ( int32 )inPtrL[ 1 ] );
128 if( iL > 2 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] );
129 if( iL > 3 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] ) & 0x00FC ) * ( int32 )inPtrL[ 3 ] );
130 if( iL > 4 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00FC ) * ( int32 )inPtrL[ 4 ] );
131 if( iL > 5 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] );
132 if( iL > 6 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 2 ) & 0x00FC ) * ( int32 )inPtrL[ 6 ] );
133 sumL += ( lSumL >> 2 );
134 }
135 }
136 break;
137
138 case 5:
139 {
140 const uint16* dpL = ( uint16* )rowPtrL;
141 for( iL = sizeL; iL >= 16; iL -= 16 )
142 {
143 int32 lSumL = 0;
144 lSumL += ( ( int8 ) ( dpL[ 0 ] << 3 ) * ( int32 )inPtrL[ 0 ] );
145 lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 1 ] );
146 lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 7 ) & 0x00F8 ) * ( int32 )inPtrL[ 2 ] );
147 lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 3 ] );
148 lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 4 ] );
149 lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00F8 ) * ( int32 )inPtrL[ 5 ] );
150 lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 6 ] );
151 lSumL += ( ( int8 ) ( ( dpL[ 2 ] ) & 0x00F8 ) * ( int32 )inPtrL[ 7 ] );
152 lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 5 ) & 0x00F8 ) * ( int32 )inPtrL[ 8 ] );
153 lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 9 ] );
154 lSumL += ( ( int8 ) ( ( dpL[ 3 ] << 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
155 lSumL += ( ( int8 ) ( ( dpL[ 3 ] >> 4 ) & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
156 lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >> 9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
157 lSumL += ( ( int8 ) ( ( dpL[ 4 ] << 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
158 lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 3 ) & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
159 lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 8 ) & 0x00F8 ) * ( int32 )inPtrL[ 15 ] );
160 sumL += ( lSumL >> 3 );
161 dpL += 5;
162 inPtrL += 16;
163 }
164
165 {
166 int32 lSumL = 0;
167 if( iL > 0 ) lSumL += ( ( int8 ) ( dpL[ 0 ] << 3 ) * ( int32 )inPtrL[ 0 ] );
168 if( iL > 1 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 1 ] );
169 if( iL > 2 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 7 ) & 0x00F8 ) * ( int32 )inPtrL[ 2 ] );
170 if( iL > 3 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 3 ] );
171 if( iL > 4 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 4 ] );
172 if( iL > 5 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00F8 ) * ( int32 )inPtrL[ 5 ] );
173 if( iL > 6 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 6 ] );
174 if( iL > 7 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] ) & 0x00F8 ) * ( int32 )inPtrL[ 7 ] );
175 if( iL > 8 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 5 ) & 0x00F8 ) * ( int32 )inPtrL[ 8 ] );
176 if( iL > 9 ) lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 9 ] );
177 if( iL > 10 ) lSumL += ( ( int8 ) ( ( dpL[ 3 ] << 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 10 ] );
178 if( iL > 11 ) lSumL += ( ( int8 ) ( ( dpL[ 3 ] >> 4 ) & 0x00F8 ) * ( int32 )inPtrL[ 11 ] );
179 if( iL > 12 ) lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >> 9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] );
180 if( iL > 13 ) lSumL += ( ( int8 ) ( ( dpL[ 4 ] << 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 13 ] );
181 if( iL > 14 ) lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 3 ) & 0x00F8 ) * ( int32 )inPtrL[ 14 ] );
182 sumL += ( lSumL >> 3 );
183 }
184 }
185 break;
186
187 case 4:
188 {
189 for( iL = sizeL; iL >= 4; iL -= 4 )
190 {
191 uint16 v1L = *rowPtrL++;
192 int32 lSumL = 0;
193 lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
194 lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
195 lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
196 lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
197 inPtrL += 4;
198 sumL += ( lSumL >> 4 );
199 }
200 {
201 uint16 v1L = *rowPtrL++;
202 int32 lSumL = 0;
203 if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
204 if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
205 if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
206 sumL += ( lSumL >> 4 );
207 }
208 }
209 break;
210
211 #endif /*ifndef HW_TMS320C5x*/
212
213 /* The default case can process all bit sizes including those that are explicitly encoded above
214 * Use the default for all bit sizes when the platform cannot handle the int8 data type (e.g. HW_TMS320C5x)
215 */
216 default:
217 {
218 uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
219 uint32 bitsL = ptrA->bitsPerValueE;
220 uint16 adjL = 16 - bitsL;
221 uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
222 uint32 srL = bitsL;
223 for( iL = 0; iL < sizeL; iL++ )
224 {
225 if( srL > 16 )
226 {
227 bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
228 srL -= 16;
229 }
230 sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] ) >> adjL;
231 srL += bitsL;
232 }
233 }
234 }
235 }
236 else /* raw dot product does not fit in int32 */
237 {
238 int32 roundL = 1 << ( overflowBitsL - 1 );
239 switch( ptrA->bitsPerValueE )
240 {
241 case 16:
242 {
243 for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ ) + roundL ) >> overflowBitsL;
244 }
245 break;
246
247 case 8:
248 {
249 for( iL = sizeL; iL >= 2; iL -= 2 )
250 {
251 uint16 v1L = *rowPtrL++;
252 int32 lSumL = ( ( int8 ) v1L * ( int32 )inPtrL[ 0 ] )
253 + ( ( int8 )( v1L >> 8 ) * ( int32 )inPtrL[ 1 ] );
254 sumL += ( lSumL + roundL ) >> overflowBitsL;
255 inPtrL += 2;
256 }
257 if( iL > 0 )
258 {
259 sumL += ( ( ( int8 )*rowPtrL++ * ( int32 )inPtrL[ 0 ] ) + roundL ) >> overflowBitsL;
260 }
261 }
262 break;
263
264 case 4:
265 {
266 for( iL = sizeL; iL >= 4; iL -= 4 )
267 {
268 uint16 v1L = *rowPtrL++;
269 int32 lSumL = 0;
270 lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
271 lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
272 lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
273 lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] );
274 inPtrL += 4;
275 sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
276 }
277 {
278 uint16 v1L = *rowPtrL++;
279 int32 lSumL = 0;
280 if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] );
281 if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] );
282 if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] );
283 sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL;
284 }
285 }
286 break;
287
288 default:
289 {
290 uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16;
291 uint32 bitsL = ptrA->bitsPerValueE;
292 uint16 adjL = 16 - bitsL;
293 uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL;
294 uint32 srL = bitsL;
295 int32 lRoundL = roundL << adjL;
296 int32 lAdjL = overflowBitsL + adjL;
297 for( iL = 0; iL < sizeL; iL++ )
298 {
299 if( srL > 16 )
300 {
301 bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 );
302 srL -= 16;
303 }
304 sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] + lRoundL ) >> lAdjL;
305 srL += bitsL;
306 }
307 }
308 }
309 }
310
311 /* compute result */
312 {
313 int32 resultManL;
314 int32 resultExpL;
315 int32 resultLogL;
316 bbs_mulS32( sumL, factorManL, &resultManL, &resultExpL );
317 resultExpL += factorExpL + overflowBitsL;
318 resultLogL = bbs_intLog2( resultManL > 0 ? resultManL : -resultManL );
319 if( resultLogL < 30 )
320 {
321 resultManL <<= 30 - resultLogL;
322 resultExpL -= 30 - resultLogL;
323 }
324
325 resultManL = ( ( resultManL >> 15 ) + 1 ) >> 1;
326 resultExpL = resultExpL + 16;
327
328 return ( ( resultManL & 0x0000FFFF ) << 16 ) | ( resultExpL & 0x0000FFFF );
329 }
330 }
331
332 /* ------------------------------------------------------------------------- */
333
334 /* ========================================================================= */
335 /* */
336 /* ---- \ghd{ constructor / destructor } ----------------------------------- */
337 /* */
338 /* ========================================================================= */
339
340 /* ------------------------------------------------------------------------- */
341
bts_CompactMat_init(struct bbs_Context * cpA,struct bts_CompactMat * ptrA)342 void bts_CompactMat_init( struct bbs_Context* cpA,
343 struct bts_CompactMat* ptrA )
344 {
345 ptrA->widthE = 0;
346 ptrA->heightE = 0;
347 ptrA->bitsPerValueE = 0;
348 ptrA->wordsPerRowE = 0;
349 ptrA->maxRowBitsE = 0;
350 bbs_Int16Arr_init( cpA, &ptrA->cpsArrE );
351 bbs_Int16Arr_init( cpA, &ptrA->expArrE );
352
353 }
354
355 /* ------------------------------------------------------------------------- */
356
bts_CompactMat_exit(struct bbs_Context * cpA,struct bts_CompactMat * ptrA)357 void bts_CompactMat_exit( struct bbs_Context* cpA,
358 struct bts_CompactMat* ptrA )
359 {
360 ptrA->widthE = 0;
361 ptrA->heightE = 0;
362 ptrA->bitsPerValueE = 0;
363 ptrA->wordsPerRowE = 0;
364 ptrA->maxRowBitsE = 0;
365 bbs_Int16Arr_exit( cpA, &ptrA->cpsArrE );
366 bbs_Int16Arr_exit( cpA, &ptrA->expArrE );
367 }
368 /* ------------------------------------------------------------------------- */
369
370 /* ========================================================================= */
371 /* */
372 /* ---- \ghd{ operators } -------------------------------------------------- */
373 /* */
374 /* ========================================================================= */
375
376 /* ------------------------------------------------------------------------- */
377
378 /* ========================================================================= */
379 /* */
380 /* ---- \ghd{ query functions } -------------------------------------------- */
381 /* */
382 /* ========================================================================= */
383
384 /* ------------------------------------------------------------------------- */
385
386 /* ========================================================================= */
387 /* */
388 /* ---- \ghd{ modify functions } ------------------------------------------- */
389 /* */
390 /* ========================================================================= */
391
392 /* ------------------------------------------------------------------------- */
393
bts_CompactMat_create(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,uint32 widthA,uint32 heightA,uint32 bitsA,uint32 maxRowSizeA,struct bbs_MemSeg * mspA)394 void bts_CompactMat_create( struct bbs_Context* cpA,
395 struct bts_CompactMat* ptrA,
396 uint32 widthA,
397 uint32 heightA,
398 uint32 bitsA,
399 uint32 maxRowSizeA,
400 struct bbs_MemSeg* mspA )
401 {
402 if( bbs_Context_error( cpA ) ) return;
403 if( bitsA < 2 || bitsA > 16 )
404 {
405 bbs_ERROR0( "bts_CompactMat_create:\nbitsA must be between 2 and 16" );
406 return;
407 }
408
409 ptrA->widthE = widthA;
410 ptrA->heightE = heightA;
411 ptrA->bitsPerValueE = bitsA;
412 ptrA->wordsPerRowE = 6 /*header + 1*/ + ( ( maxRowSizeA * bitsA ) / ( 8 * sizeof( short ) ) );
413 ptrA->maxRowBitsE = 0;
414 if( ( ptrA->wordsPerRowE & 1 ) != 0 ) ptrA->wordsPerRowE++;
415 bbs_Int16Arr_create( cpA, &ptrA->cpsArrE, heightA * ptrA->wordsPerRowE, mspA );
416 bbs_Int16Arr_fill( cpA, &ptrA->cpsArrE, 0 );
417 bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
418 bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
419 }
420
421 /* ------------------------------------------------------------------------- */
422
bts_CompactMat_copy(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,const struct bts_CompactMat * srcPtrA)423 void bts_CompactMat_copy( struct bbs_Context* cpA,
424 struct bts_CompactMat* ptrA,
425 const struct bts_CompactMat* srcPtrA )
426 {
427 ptrA->widthE = srcPtrA->widthE;
428 ptrA->heightE = srcPtrA->heightE;
429 ptrA->bitsPerValueE = srcPtrA->bitsPerValueE;
430 ptrA->wordsPerRowE = srcPtrA->wordsPerRowE;
431 ptrA->maxRowBitsE = srcPtrA->maxRowBitsE;
432 bbs_Int16Arr_copy( cpA, &ptrA->cpsArrE, &srcPtrA->cpsArrE );
433 bbs_Int16Arr_size( cpA, &ptrA->expArrE, ptrA->heightE );
434 }
435
436 /* ------------------------------------------------------------------------- */
437
438 /* ========================================================================= */
439 /* */
440 /* ---- \ghd{ I/O } -------------------------------------------------------- */
441 /* */
442 /* ========================================================================= */
443
444 /* ------------------------------------------------------------------------- */
445
bts_CompactMat_memSize(struct bbs_Context * cpA,const struct bts_CompactMat * ptrA)446 uint32 bts_CompactMat_memSize( struct bbs_Context* cpA,
447 const struct bts_CompactMat *ptrA )
448 {
449 return bbs_SIZEOF16( uint32 )
450 + bbs_SIZEOF16( uint32 ) /* version */
451 + bbs_SIZEOF16( ptrA->widthE )
452 + bbs_SIZEOF16( ptrA->heightE )
453 + bbs_SIZEOF16( ptrA->bitsPerValueE )
454 + bbs_SIZEOF16( ptrA->wordsPerRowE )
455 + bbs_SIZEOF16( ptrA->maxRowBitsE )
456 + bbs_Int16Arr_memSize( cpA, &ptrA->cpsArrE );
457 }
458
459 /* ------------------------------------------------------------------------- */
460
bts_CompactMat_memWrite(struct bbs_Context * cpA,const struct bts_CompactMat * ptrA,uint16 * memPtrA)461 uint32 bts_CompactMat_memWrite( struct bbs_Context* cpA,
462 const struct bts_CompactMat* ptrA,
463 uint16* memPtrA )
464 {
465 uint32 memSizeL = bts_CompactMat_memSize( cpA, ptrA );
466 memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
467 memPtrA += bbs_memWriteUInt32( bts_COMPACT_MAT_VERSION, memPtrA );
468 memPtrA += bbs_memWrite32( &ptrA->widthE, memPtrA );
469 memPtrA += bbs_memWrite32( &ptrA->heightE, memPtrA );
470 memPtrA += bbs_memWrite32( &ptrA->bitsPerValueE, memPtrA );
471 memPtrA += bbs_memWrite32( &ptrA->wordsPerRowE, memPtrA );
472 memPtrA += bbs_memWrite32( &ptrA->maxRowBitsE, memPtrA );
473 memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->cpsArrE, memPtrA );
474 return memSizeL;
475 }
476
477 /* ------------------------------------------------------------------------- */
478
bts_CompactMat_memRead(struct bbs_Context * cpA,struct bts_CompactMat * ptrA,const uint16 * memPtrA,struct bbs_MemSeg * mspA)479 uint32 bts_CompactMat_memRead( struct bbs_Context* cpA,
480 struct bts_CompactMat* ptrA,
481 const uint16* memPtrA,
482 struct bbs_MemSeg* mspA )
483 {
484 uint32 memSizeL, versionL;
485 if( bbs_Context_error( cpA ) ) return 0;
486 memPtrA += bbs_memRead32( &memSizeL, memPtrA );
487 memPtrA += bbs_memReadVersion32( cpA, &versionL, bts_COMPACT_MAT_VERSION, memPtrA );
488 memPtrA += bbs_memRead32( &ptrA->widthE, memPtrA );
489 memPtrA += bbs_memRead32( &ptrA->heightE, memPtrA );
490 memPtrA += bbs_memRead32( &ptrA->bitsPerValueE, memPtrA );
491 memPtrA += bbs_memRead32( &ptrA->wordsPerRowE, memPtrA );
492 memPtrA += bbs_memRead32( &ptrA->maxRowBitsE, memPtrA );
493 memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->cpsArrE, memPtrA, mspA );
494
495 if( memSizeL != bts_CompactMat_memSize( cpA, ptrA ) )
496 {
497 bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_CompactMat_memRead( const struct bts_CompactMat* ptrA, const void* memPtrA ):\n"
498 "size mismatch" );
499 }
500
501 bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA );
502 bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 );
503
504 return memSizeL;
505 }
506
507 /* ------------------------------------------------------------------------- */
508
509 /* ========================================================================= */
510 /* */
511 /* ---- \ghd{ exec functions } --------------------------------------------- */
512 /* */
513 /* ========================================================================= */
514
515 /* ------------------------------------------------------------------------- */
516
bts_CompactMat_map(struct bbs_Context * cpA,const struct bts_CompactMat * ptrA,const int16 * inVecA,int16 * outVecA,int16 * outExpPtrA)517 void bts_CompactMat_map( struct bbs_Context* cpA,
518 const struct bts_CompactMat* ptrA,
519 const int16* inVecA,
520 int16* outVecA,
521 int16* outExpPtrA )
522 {
523 uint32 inNormBitsL = bbs_intLog2( bbs_vecNorm16( inVecA, ptrA->widthE ) ) + 1;
524 uint32 iL;
525
526 int16* expArrL = ( ( struct bts_CompactMat* )ptrA )->expArrE.arrPtrE;
527 int16 maxExpL = -32767;
528
529 for( iL = 0; iL < ptrA->heightE; iL++ )
530 {
531 int32 fltL = bts_CompactMat_fltDotPrdRow( cpA, ( struct bts_CompactMat* )ptrA, inVecA, inNormBitsL, iL );
532 outVecA[ iL ] = fltL >> 16;
533 expArrL[ iL ] = fltL & 0x0000FFFF;
534
535 maxExpL = ( expArrL[ iL ] > maxExpL ) ? expArrL[ iL ] : maxExpL;
536 }
537
538 if( outExpPtrA != NULL ) *outExpPtrA = maxExpL;
539
540 for( iL = 0; iL < ptrA->heightE; iL++ )
541 {
542 int32 shrL = maxExpL - expArrL[ iL ];
543 if( shrL > 0 )
544 {
545 outVecA[ iL ] = ( ( outVecA[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
546 }
547 }
548 }
549
550 /* ------------------------------------------------------------------------- */
551
552 /* ========================================================================= */
553
554