1 /*
2 * Copyright (c) 2017 Gerion Entrup
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 /**
22 * @file
23 * MPEG-7 video signature calculation and lookup filter
24 */
25
26 #include "signature.h"
27
28 #define HOUGH_MAX_OFFSET 90
29 #define MAX_FRAMERATE 60
30
31 #define DIR_PREV 0
32 #define DIR_NEXT 1
33 #define DIR_PREV_END 2
34 #define DIR_NEXT_END 3
35
36 #define STATUS_NULL 0
37 #define STATUS_END_REACHED 1
38 #define STATUS_BEGIN_REACHED 2
39
fill_l1distlut(uint8_t lut[])40 static void fill_l1distlut(uint8_t lut[])
41 {
42 int i, j, tmp_i, tmp_j,count;
43 uint8_t dist;
44
45 for (i = 0, count = 0; i < 242; i++) {
46 for (j = i + 1; j < 243; j++, count++) {
47 /* ternary distance between i and j */
48 dist = 0;
49 tmp_i = i; tmp_j = j;
50 do {
51 dist += FFABS((tmp_j % 3) - (tmp_i % 3));
52 tmp_j /= 3;
53 tmp_i /= 3;
54 } while (tmp_i > 0 || tmp_j > 0);
55 lut[count] = dist;
56 }
57 }
58 }
59
intersection_word(const uint8_t * first,const uint8_t * second)60 static unsigned int intersection_word(const uint8_t *first, const uint8_t *second)
61 {
62 unsigned int val=0,i;
63 for (i = 0; i < 28; i += 4) {
64 val += av_popcount( (first[i] & second[i] ) << 24 |
65 (first[i+1] & second[i+1]) << 16 |
66 (first[i+2] & second[i+2]) << 8 |
67 (first[i+3] & second[i+3]) );
68 }
69 val += av_popcount( (first[28] & second[28]) << 16 |
70 (first[29] & second[29]) << 8 |
71 (first[30] & second[30]) );
72 return val;
73 }
74
union_word(const uint8_t * first,const uint8_t * second)75 static unsigned int union_word(const uint8_t *first, const uint8_t *second)
76 {
77 unsigned int val=0,i;
78 for (i = 0; i < 28; i += 4) {
79 val += av_popcount( (first[i] | second[i] ) << 24 |
80 (first[i+1] | second[i+1]) << 16 |
81 (first[i+2] | second[i+2]) << 8 |
82 (first[i+3] | second[i+3]) );
83 }
84 val += av_popcount( (first[28] | second[28]) << 16 |
85 (first[29] | second[29]) << 8 |
86 (first[30] | second[30]) );
87 return val;
88 }
89
get_l1dist(AVFilterContext * ctx,SignatureContext * sc,const uint8_t * first,const uint8_t * second)90 static unsigned int get_l1dist(AVFilterContext *ctx, SignatureContext *sc, const uint8_t *first, const uint8_t *second)
91 {
92 unsigned int i;
93 unsigned int dist = 0;
94 uint8_t f, s;
95
96 for (i = 0; i < SIGELEM_SIZE/5; i++) {
97 if (first[i] != second[i]) {
98 f = first[i];
99 s = second[i];
100 if (f > s) {
101 /* little variation of gauss sum formula */
102 dist += sc->l1distlut[243*242/2 - (243-s)*(242-s)/2 + f - s - 1];
103 } else {
104 dist += sc->l1distlut[243*242/2 - (243-f)*(242-f)/2 + s - f - 1];
105 }
106 }
107 }
108 return dist;
109 }
110
111 /**
112 * calculates the jaccard distance and evaluates a pair of coarse signatures as good
113 * @return 0 if pair is bad, 1 otherwise
114 */
get_jaccarddist(SignatureContext * sc,CoarseSignature * first,CoarseSignature * second)115 static int get_jaccarddist(SignatureContext *sc, CoarseSignature *first, CoarseSignature *second)
116 {
117 int jaccarddist, i, composdist = 0, cwthcount = 0;
118 for (i = 0; i < 5; i++) {
119 if ((jaccarddist = intersection_word(first->data[i], second->data[i])) > 0) {
120 jaccarddist /= union_word(first->data[i], second->data[i]);
121 }
122 if (jaccarddist >= sc->thworddist) {
123 if (++cwthcount > 2) {
124 /* more than half (5/2) of distances are too wide */
125 return 0;
126 }
127 }
128 composdist += jaccarddist;
129 if (composdist > sc->thcomposdist) {
130 return 0;
131 }
132 }
133 return 1;
134 }
135
136 /**
137 * step through the coarsesignatures as long as a good candidate is found
138 * @return 0 if no candidate is found, 1 otherwise
139 */
find_next_coarsecandidate(SignatureContext * sc,CoarseSignature * secondstart,CoarseSignature ** first,CoarseSignature ** second,int start)140 static int find_next_coarsecandidate(SignatureContext *sc, CoarseSignature *secondstart, CoarseSignature **first, CoarseSignature **second, int start)
141 {
142 /* go one coarsesignature foreword */
143 if (!start) {
144 if ((*second)->next) {
145 *second = (*second)->next;
146 } else if ((*first)->next) {
147 *second = secondstart;
148 *first = (*first)->next;
149 } else {
150 return 0;
151 }
152 }
153
154 while (1) {
155 if (get_jaccarddist(sc, *first, *second))
156 return 1;
157
158 /* next signature */
159 if ((*second)->next) {
160 *second = (*second)->next;
161 } else if ((*first)->next) {
162 *second = secondstart;
163 *first = (*first)->next;
164 } else {
165 return 0;
166 }
167 }
168 }
169
170 /**
171 * compares framesignatures and sorts out signatures with a l1 distance above a given threshold.
172 * Then tries to find out offset and differences between framerates with a hough transformation
173 */
get_matching_parameters(AVFilterContext * ctx,SignatureContext * sc,FineSignature * first,FineSignature * second)174 static MatchingInfo* get_matching_parameters(AVFilterContext *ctx, SignatureContext *sc, FineSignature *first, FineSignature *second)
175 {
176 FineSignature *f, *s;
177 size_t i, j, k, l, hmax = 0, score;
178 int framerate, offset, l1dist;
179 double m;
180 MatchingInfo *cands = NULL, *c = NULL;
181
182 struct {
183 uint8_t size;
184 unsigned int dist;
185 FineSignature *a;
186 uint8_t b_pos[COARSE_SIZE];
187 FineSignature *b[COARSE_SIZE];
188 } pairs[COARSE_SIZE];
189
190 typedef struct hspace_elem {
191 int dist;
192 size_t score;
193 FineSignature *a;
194 FineSignature *b;
195 } hspace_elem;
196
197 /* houghspace */
198 hspace_elem** hspace = av_malloc_array(MAX_FRAMERATE, sizeof(hspace_elem *));
199
200 /* initialize houghspace */
201 for (i = 0; i < MAX_FRAMERATE; i++) {
202 hspace[i] = av_malloc_array(2 * HOUGH_MAX_OFFSET + 1, sizeof(hspace_elem));
203 for (j = 0; j < HOUGH_MAX_OFFSET; j++) {
204 hspace[i][j].score = 0;
205 hspace[i][j].dist = 99999;
206 }
207 }
208
209 /* l1 distances */
210 for (i = 0, f = first; i < COARSE_SIZE && f->next; i++, f = f->next) {
211 pairs[i].size = 0;
212 pairs[i].dist = 99999;
213 pairs[i].a = f;
214 for (j = 0, s = second; j < COARSE_SIZE && s->next; j++, s = s->next) {
215 /* l1 distance of finesignature */
216 l1dist = get_l1dist(ctx, sc, f->framesig, s->framesig);
217 if (l1dist < sc->thl1) {
218 if (l1dist < pairs[i].dist) {
219 pairs[i].size = 1;
220 pairs[i].dist = l1dist;
221 pairs[i].b_pos[0] = j;
222 pairs[i].b[0] = s;
223 } else if (l1dist == pairs[i].dist) {
224 pairs[i].b[pairs[i].size] = s;
225 pairs[i].b_pos[pairs[i].size] = j;
226 pairs[i].size++;
227 }
228 }
229 }
230 }
231 /* last incomplete coarsesignature */
232 if (f->next == NULL) {
233 for (; i < COARSE_SIZE; i++) {
234 pairs[i].size = 0;
235 pairs[i].dist = 99999;
236 }
237 }
238
239 /* hough transformation */
240 for (i = 0; i < COARSE_SIZE; i++) {
241 for (j = 0; j < pairs[i].size; j++) {
242 for (k = i + 1; k < COARSE_SIZE; k++) {
243 for (l = 0; l < pairs[k].size; l++) {
244 if (pairs[i].b[j] != pairs[k].b[l]) {
245 /* linear regression */
246 m = (pairs[k].b_pos[l]-pairs[i].b_pos[j]) / (k-i); /* good value between 0.0 - 2.0 */
247 framerate = (int) m*30 + 0.5; /* round up to 0 - 60 */
248 if (framerate>0 && framerate <= MAX_FRAMERATE) {
249 offset = pairs[i].b_pos[j] - ((int) m*i + 0.5); /* only second part has to be rounded up */
250 if (offset > -HOUGH_MAX_OFFSET && offset < HOUGH_MAX_OFFSET) {
251 if (pairs[i].dist < pairs[k].dist) {
252 if (pairs[i].dist < hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist) {
253 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist = pairs[i].dist;
254 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].a = pairs[i].a;
255 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].b = pairs[i].b[j];
256 }
257 } else {
258 if (pairs[k].dist < hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist) {
259 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].dist = pairs[k].dist;
260 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].a = pairs[k].a;
261 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].b = pairs[k].b[l];
262 }
263 }
264
265 score = hspace[framerate-1][offset+HOUGH_MAX_OFFSET].score + 1;
266 if (score > hmax )
267 hmax = score;
268 hspace[framerate-1][offset+HOUGH_MAX_OFFSET].score = score;
269 }
270 }
271 }
272 }
273 }
274 }
275 }
276
277 if (hmax > 0) {
278 hmax = (int) (0.7*hmax);
279 for (i = 0; i < MAX_FRAMERATE; i++) {
280 for (j = 0; j < HOUGH_MAX_OFFSET; j++) {
281 if (hmax < hspace[i][j].score) {
282 if (c == NULL) {
283 c = av_malloc(sizeof(MatchingInfo));
284 if (!c)
285 av_log(ctx, AV_LOG_FATAL, "Could not allocate memory");
286 cands = c;
287 } else {
288 c->next = av_malloc(sizeof(MatchingInfo));
289 if (!c->next)
290 av_log(ctx, AV_LOG_FATAL, "Could not allocate memory");
291 c = c->next;
292 }
293 c->framerateratio = (i+1.0) / 30;
294 c->score = hspace[i][j].score;
295 c->offset = j-90;
296 c->first = hspace[i][j].a;
297 c->second = hspace[i][j].b;
298 c->next = NULL;
299
300 /* not used */
301 c->meandist = 0;
302 c->matchframes = 0;
303 c->whole = 0;
304 }
305 }
306 }
307 }
308 for (i = 0; i < MAX_FRAMERATE; i++) {
309 av_freep(&hspace[i]);
310 }
311 av_freep(&hspace);
312 return cands;
313 }
314
iterate_frame(double frr,FineSignature ** a,FineSignature ** b,int fcount,int * bcount,int dir)315 static int iterate_frame(double frr, FineSignature **a, FineSignature **b, int fcount, int *bcount, int dir)
316 {
317 int step;
318
319 /* between 1 and 2, because frr is between 1 and 2 */
320 step = ((int) 0.5 + fcount * frr) /* current frame */
321 -((int) 0.5 + (fcount-1) * frr);/* last frame */
322
323 if (dir == DIR_NEXT) {
324 if (frr >= 1.0) {
325 if ((*a)->next) {
326 *a = (*a)->next;
327 } else {
328 return DIR_NEXT_END;
329 }
330
331 if (step == 1) {
332 if ((*b)->next) {
333 *b = (*b)->next;
334 (*bcount)++;
335 } else {
336 return DIR_NEXT_END;
337 }
338 } else {
339 if ((*b)->next && (*b)->next->next) {
340 *b = (*b)->next->next;
341 (*bcount)++;
342 } else {
343 return DIR_NEXT_END;
344 }
345 }
346 } else {
347 if ((*b)->next) {
348 *b = (*b)->next;
349 (*bcount)++;
350 } else {
351 return DIR_NEXT_END;
352 }
353
354 if (step == 1) {
355 if ((*a)->next) {
356 *a = (*a)->next;
357 } else {
358 return DIR_NEXT_END;
359 }
360 } else {
361 if ((*a)->next && (*a)->next->next) {
362 *a = (*a)->next->next;
363 } else {
364 return DIR_NEXT_END;
365 }
366 }
367 }
368 return DIR_NEXT;
369 } else {
370 if (frr >= 1.0) {
371 if ((*a)->prev) {
372 *a = (*a)->prev;
373 } else {
374 return DIR_PREV_END;
375 }
376
377 if (step == 1) {
378 if ((*b)->prev) {
379 *b = (*b)->prev;
380 (*bcount)++;
381 } else {
382 return DIR_PREV_END;
383 }
384 } else {
385 if ((*b)->prev && (*b)->prev->prev) {
386 *b = (*b)->prev->prev;
387 (*bcount)++;
388 } else {
389 return DIR_PREV_END;
390 }
391 }
392 } else {
393 if ((*b)->prev) {
394 *b = (*b)->prev;
395 (*bcount)++;
396 } else {
397 return DIR_PREV_END;
398 }
399
400 if (step == 1) {
401 if ((*a)->prev) {
402 *a = (*a)->prev;
403 } else {
404 return DIR_PREV_END;
405 }
406 } else {
407 if ((*a)->prev && (*a)->prev->prev) {
408 *a = (*a)->prev->prev;
409 } else {
410 return DIR_PREV_END;
411 }
412 }
413 }
414 return DIR_PREV;
415 }
416 }
417
evaluate_parameters(AVFilterContext * ctx,SignatureContext * sc,MatchingInfo * infos,MatchingInfo bestmatch,int mode)418 static MatchingInfo evaluate_parameters(AVFilterContext *ctx, SignatureContext *sc, MatchingInfo *infos, MatchingInfo bestmatch, int mode)
419 {
420 int dist, distsum = 0, bcount = 1, dir = DIR_NEXT;
421 int fcount = 0, goodfcount = 0, gooda = 0, goodb = 0;
422 double meandist, minmeandist = bestmatch.meandist;
423 int tolerancecount = 0;
424 FineSignature *a, *b, *aprev, *bprev;
425 int status = STATUS_NULL;
426
427 for (; infos != NULL; infos = infos->next) {
428 a = infos->first;
429 b = infos->second;
430 while (1) {
431 dist = get_l1dist(ctx, sc, a->framesig, b->framesig);
432
433 if (dist > sc->thl1) {
434 if (a->confidence >= 1 || b->confidence >= 1) {
435 /* bad frame (because high different information) */
436 tolerancecount++;
437 }
438
439 if (tolerancecount > 2) {
440 a = aprev;
441 b = bprev;
442 if (dir == DIR_NEXT) {
443 /* turn around */
444 a = infos->first;
445 b = infos->second;
446 dir = DIR_PREV;
447 } else {
448 break;
449 }
450 }
451 } else {
452 /* good frame */
453 distsum += dist;
454 goodfcount++;
455 tolerancecount=0;
456
457 aprev = a;
458 bprev = b;
459
460 if (a->confidence < 1) gooda++;
461 if (b->confidence < 1) goodb++;
462 }
463
464 fcount++;
465
466 dir = iterate_frame(infos->framerateratio, &a, &b, fcount, &bcount, dir);
467 if (dir == DIR_NEXT_END) {
468 status = STATUS_END_REACHED;
469 a = infos->first;
470 b = infos->second;
471 dir = iterate_frame(infos->framerateratio, &a, &b, fcount, &bcount, DIR_PREV);
472 }
473
474 if (dir == DIR_PREV_END) {
475 status |= STATUS_BEGIN_REACHED;
476 break;
477 }
478
479 if (sc->thdi != 0 && bcount >= sc->thdi) {
480 break; /* enough frames found */
481 }
482 }
483
484 if (bcount < sc->thdi)
485 continue; /* matching sequence is too short */
486 if ((double) goodfcount / (double) fcount < sc->thit)
487 continue;
488 if ((double) goodfcount*0.5 < FFMAX(gooda, goodb))
489 continue;
490
491 meandist = (double) goodfcount / (double) distsum;
492
493 if (meandist < minmeandist ||
494 status == STATUS_END_REACHED | STATUS_BEGIN_REACHED ||
495 mode == MODE_FAST){
496 minmeandist = meandist;
497 /* bestcandidate in this iteration */
498 bestmatch.meandist = meandist;
499 bestmatch.matchframes = bcount;
500 bestmatch.framerateratio = infos->framerateratio;
501 bestmatch.score = infos->score;
502 bestmatch.offset = infos->offset;
503 bestmatch.first = infos->first;
504 bestmatch.second = infos->second;
505 bestmatch.whole = 0; /* will be set to true later */
506 bestmatch.next = NULL;
507 }
508
509 /* whole sequence is automatically best match */
510 if (status == (STATUS_END_REACHED | STATUS_BEGIN_REACHED)) {
511 bestmatch.whole = 1;
512 break;
513 }
514
515 /* first matching sequence is enough, finding the best one is not necessary */
516 if (mode == MODE_FAST) {
517 break;
518 }
519 }
520 return bestmatch;
521 }
522
sll_free(MatchingInfo * sll)523 static void sll_free(MatchingInfo *sll)
524 {
525 void *tmp;
526 while (sll) {
527 tmp = sll;
528 sll = sll->next;
529 av_freep(&tmp);
530 }
531 }
532
lookup_signatures(AVFilterContext * ctx,SignatureContext * sc,StreamContext * first,StreamContext * second,int mode)533 static MatchingInfo lookup_signatures(AVFilterContext *ctx, SignatureContext *sc, StreamContext *first, StreamContext *second, int mode)
534 {
535 CoarseSignature *cs, *cs2;
536 MatchingInfo *infos;
537 MatchingInfo bestmatch;
538 MatchingInfo *i;
539
540 cs = first->coarsesiglist;
541 cs2 = second->coarsesiglist;
542
543 /* score of bestmatch is 0, if no match is found */
544 bestmatch.score = 0;
545 bestmatch.meandist = 99999;
546 bestmatch.whole = 0;
547
548 fill_l1distlut(sc->l1distlut);
549
550 /* stage 1: coarsesignature matching */
551 if (find_next_coarsecandidate(sc, second->coarsesiglist, &cs, &cs2, 1) == 0)
552 return bestmatch; /* no candidate found */
553 do {
554 av_log(ctx, AV_LOG_DEBUG, "Stage 1: got coarsesignature pair. "
555 "indices of first frame: %"PRIu32" and %"PRIu32"\n",
556 cs->first->index, cs2->first->index);
557 /* stage 2: l1-distance and hough-transform */
558 av_log(ctx, AV_LOG_DEBUG, "Stage 2: calculate matching parameters\n");
559 infos = get_matching_parameters(ctx, sc, cs->first, cs2->first);
560 if (av_log_get_level() == AV_LOG_DEBUG) {
561 for (i = infos; i != NULL; i = i->next) {
562 av_log(ctx, AV_LOG_DEBUG, "Stage 2: matching pair at %"PRIu32" and %"PRIu32", "
563 "ratio %f, offset %d\n", i->first->index, i->second->index,
564 i->framerateratio, i->offset);
565 }
566 }
567 /* stage 3: evaluation */
568 av_log(ctx, AV_LOG_DEBUG, "Stage 3: evaluate\n");
569 if (infos) {
570 bestmatch = evaluate_parameters(ctx, sc, infos, bestmatch, mode);
571 av_log(ctx, AV_LOG_DEBUG, "Stage 3: best matching pair at %"PRIu32" and %"PRIu32", "
572 "ratio %f, offset %d, score %d, %d frames matching\n",
573 bestmatch.first->index, bestmatch.second->index,
574 bestmatch.framerateratio, bestmatch.offset, bestmatch.score, bestmatch.matchframes);
575 sll_free(infos);
576 }
577 } while (find_next_coarsecandidate(sc, second->coarsesiglist, &cs, &cs2, 0) && !bestmatch.whole);
578 return bestmatch;
579
580 }
581