• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer
2  * Copyright (C) 2003 Benjamin Otte <in7y118@public.uni-hamburg.de>
3  * Copyright (C) 2005-2009 Tim-Philipp Müller <tim centricular net>
4  * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
5  *
6  * gsttypefindfunctions.c: collection of various typefind functions
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public
19  * License along with this library; if not, write to the
20  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include <glib.h>
29 #include <glib/gprintf.h>
30 
31 /* don't want to add gio xdgmime typefinder if gio was disabled via configure */
32 #ifdef HAVE_GIO
33 #include <gio/gio.h>
34 #define USE_GIO
35 #endif
36 
37 #include <gst/gst.h>
38 
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 
43 #include <gst/pbutils/pbutils.h>
44 #include <gst/base/gstbytereader.h>
45 
46 GST_DEBUG_CATEGORY_STATIC (type_find_debug);
47 #define GST_CAT_DEFAULT type_find_debug
48 
49 /* DataScanCtx: helper for typefind functions that scan through data
50  * step-by-step, to avoid doing a peek at each and every offset */
51 
52 #define DATA_SCAN_CTX_CHUNK_SIZE 4096
53 
54 typedef struct
55 {
56   guint64 offset;
57   const guint8 *data;
58   guint size;
59 } DataScanCtx;
60 
61 static inline void
data_scan_ctx_advance(GstTypeFind * tf,DataScanCtx * c,guint bytes_to_skip)62 data_scan_ctx_advance (GstTypeFind * tf, DataScanCtx * c, guint bytes_to_skip)
63 {
64   c->offset += bytes_to_skip;
65   if (G_LIKELY (c->size > bytes_to_skip)) {
66     c->size -= bytes_to_skip;
67     c->data += bytes_to_skip;
68   } else {
69     c->data += c->size;
70     c->size = 0;
71   }
72 }
73 
74 static inline gboolean
data_scan_ctx_ensure_data(GstTypeFind * tf,DataScanCtx * c,guint min_len)75 data_scan_ctx_ensure_data (GstTypeFind * tf, DataScanCtx * c, guint min_len)
76 {
77   const guint8 *data;
78   guint64 len;
79   guint chunk_len = MAX (DATA_SCAN_CTX_CHUNK_SIZE, min_len);
80 
81   if (G_LIKELY (c->size >= min_len))
82     return TRUE;
83 
84   data = gst_type_find_peek (tf, c->offset, chunk_len);
85   if (G_LIKELY (data != NULL)) {
86     c->data = data;
87     c->size = chunk_len;
88     return TRUE;
89   }
90 
91   /* if there's less than our chunk size, try to get as much as we can, but
92    * always at least min_len bytes (we might be typefinding the first buffer
93    * of the stream and not have as much data available as we'd like) */
94   len = gst_type_find_get_length (tf);
95   if (len > 0) {
96     len = CLAMP (len - c->offset, min_len, chunk_len);
97   } else {
98     len = min_len;
99   }
100 
101   data = gst_type_find_peek (tf, c->offset, len);
102   if (data != NULL) {
103     c->data = data;
104     c->size = len;
105     return TRUE;
106   }
107 
108   return FALSE;
109 }
110 
111 static inline gboolean
data_scan_ctx_memcmp(GstTypeFind * tf,DataScanCtx * c,guint offset,const gchar * data,guint len)112 data_scan_ctx_memcmp (GstTypeFind * tf, DataScanCtx * c, guint offset,
113     const gchar * data, guint len)
114 {
115   if (G_UNLIKELY (offset + len >= G_MAXUINT32))
116     return FALSE;
117 
118   if (!data_scan_ctx_ensure_data (tf, c, offset + len))
119     return FALSE;
120 
121   return (memcmp (c->data + offset, data, len) == 0);
122 }
123 
124 /*** text/plain ***/
125 static gboolean xml_check_first_element (GstTypeFind * tf,
126     const gchar * element, guint elen, gboolean strict);
127 static gboolean sdp_check_header (GstTypeFind * tf);
128 
129 static GstStaticCaps utf8_caps = GST_STATIC_CAPS ("text/plain");
130 
131 #define UTF8_CAPS gst_static_caps_get(&utf8_caps)
132 
133 static gboolean
utf8_type_find_have_valid_utf8_at_offset(GstTypeFind * tf,guint64 offset,GstTypeFindProbability * prob)134 utf8_type_find_have_valid_utf8_at_offset (GstTypeFind * tf, guint64 offset,
135     GstTypeFindProbability * prob)
136 {
137   const guint8 *data;
138 
139   /* randomly decided values */
140   guint min_size = 16;          /* minimum size  */
141   guint size = 32 * 1024;       /* starting size */
142   guint probability = 95;       /* starting probability */
143   guint step = 10;              /* how much we reduce probability in each
144                                  * iteration */
145 
146   while (probability > step && size > min_size) {
147     data = gst_type_find_peek (tf, offset, size);
148     if (data) {
149       gchar *end;
150       gchar *start = (gchar *) data;
151 
152       if (g_utf8_validate (start, size, (const gchar **) &end) || (end - start + 4 > size)) {   /* allow last char to be cut off */
153         *prob = probability;
154         return TRUE;
155       }
156       *prob = 0;
157       return FALSE;
158     }
159     size /= 2;
160     probability -= step;
161   }
162   *prob = 0;
163   return FALSE;
164 }
165 
166 static void
utf8_type_find(GstTypeFind * tf,gpointer unused)167 utf8_type_find (GstTypeFind * tf, gpointer unused)
168 {
169   GstTypeFindProbability start_prob, mid_prob;
170   guint64 length;
171 
172   /* leave xml to the xml typefinders */
173   if (xml_check_first_element (tf, "", 0, TRUE))
174     return;
175 
176   /* leave sdp to the sdp typefinders */
177   if (sdp_check_header (tf))
178     return;
179 
180   /* check beginning of stream */
181   if (!utf8_type_find_have_valid_utf8_at_offset (tf, 0, &start_prob))
182     return;
183 
184   GST_LOG ("start is plain text with probability of %u", start_prob);
185 
186   /* POSSIBLE is the highest probability we ever return if we can't
187    * probe into the middle of the file and don't know its length */
188 
189   length = gst_type_find_get_length (tf);
190   if (length == 0 || length == (guint64) - 1) {
191     gst_type_find_suggest (tf, MIN (start_prob, GST_TYPE_FIND_POSSIBLE),
192         UTF8_CAPS);
193     return;
194   }
195 
196   if (length < 64 * 1024) {
197     gst_type_find_suggest (tf, start_prob, UTF8_CAPS);
198     return;
199   }
200 
201   /* check middle of stream */
202   if (!utf8_type_find_have_valid_utf8_at_offset (tf, length / 2, &mid_prob))
203     return;
204 
205   GST_LOG ("middle is plain text with probability of %u", mid_prob);
206   gst_type_find_suggest (tf, (start_prob + mid_prob) / 2, UTF8_CAPS);
207 }
208 
209 /*** text/utf-16 and text/utf-32} ***/
210 /* While UTF-8 is unicode too, using text/plain for UTF-16 and UTF-32
211    is going to break stuff. */
212 
213 typedef struct
214 {
215   size_t bomlen;
216   const char *const bom;
217     gboolean (*checker) (const guint8 *, gint, gint);
218   int boost;
219   int endianness;
220 } GstUnicodeTester;
221 
222 static gboolean
check_utf16(const guint8 * data,gint len,gint endianness)223 check_utf16 (const guint8 * data, gint len, gint endianness)
224 {
225   GstByteReader br;
226   guint16 high, low;
227 
228   low = high = 0;
229 
230   if (len & 1)
231     return FALSE;
232 
233   gst_byte_reader_init (&br, data, len);
234   while (len >= 2) {
235     /* test first for a single 16 bit value in the BMP */
236     if (endianness == G_BIG_ENDIAN)
237       high = gst_byte_reader_get_uint16_be_unchecked (&br);
238     else
239       high = gst_byte_reader_get_uint16_le_unchecked (&br);
240     if (high >= 0xD800 && high <= 0xDBFF) {
241       /* start of a surrogate pair */
242       if (len < 4)
243         return FALSE;
244       len -= 2;
245       if (endianness == G_BIG_ENDIAN)
246         low = gst_byte_reader_get_uint16_be_unchecked (&br);
247       else
248         low = gst_byte_reader_get_uint16_le_unchecked (&br);
249       if (low >= 0xDC00 && low <= 0xDFFF) {
250         /* second half of the surrogate pair */
251       } else
252         return FALSE;
253     } else {
254       if (high >= 0xDC00 && high <= 0xDFFF)
255         return FALSE;
256     }
257     len -= 2;
258   }
259   return TRUE;
260 }
261 
262 static gboolean
check_utf32(const guint8 * data,gint len,gint endianness)263 check_utf32 (const guint8 * data, gint len, gint endianness)
264 {
265   if (len & 3)
266     return FALSE;
267   while (len > 3) {
268     guint32 v;
269     if (endianness == G_BIG_ENDIAN)
270       v = GST_READ_UINT32_BE (data);
271     else
272       v = GST_READ_UINT32_LE (data);
273     if (v >= 0x10FFFF)
274       return FALSE;
275     data += 4;
276     len -= 4;
277   }
278   return TRUE;
279 }
280 
281 static void
unicode_type_find(GstTypeFind * tf,const GstUnicodeTester * tester,guint n_tester,const char * media_type,gboolean require_bom)282 unicode_type_find (GstTypeFind * tf, const GstUnicodeTester * tester,
283     guint n_tester, const char *media_type, gboolean require_bom)
284 {
285   gsize n;
286   gsize len = 4;
287   const guint8 *data = gst_type_find_peek (tf, 0, len);
288   int prob = -1;
289   const gint max_scan_size = 256 * 1024;
290   int endianness = 0;
291 
292   if (!data) {
293     len = 2;
294     data = gst_type_find_peek (tf, 0, len);
295     if (!data)
296       return;
297   }
298 
299   /* find a large enough size that works */
300   while (len < max_scan_size) {
301     size_t newlen = len << 1;
302     const guint8 *newdata = gst_type_find_peek (tf, 0, newlen);
303     if (!newdata)
304       break;
305     len = newlen;
306     data = newdata;
307   }
308 
309   for (n = 0; n < n_tester; ++n) {
310     int bom_boost = 0, tmpprob;
311     if (len >= tester[n].bomlen) {
312       if (!memcmp (data, tester[n].bom, tester[n].bomlen))
313         bom_boost = tester[n].boost;
314     }
315     if (require_bom && bom_boost == 0)
316       continue;
317     if (!(*tester[n].checker) (data, len, tester[n].endianness))
318       continue;
319     tmpprob = GST_TYPE_FIND_POSSIBLE - 20 + bom_boost;
320     if (tmpprob > prob) {
321       prob = tmpprob;
322       endianness = tester[n].endianness;
323     }
324   }
325 
326   if (prob > 0) {
327     GST_DEBUG ("This is valid %s %s", media_type,
328         endianness == G_BIG_ENDIAN ? "be" : "le");
329     gst_type_find_suggest_simple (tf, prob, media_type,
330         "endianness", G_TYPE_INT, endianness, NULL);
331   }
332 }
333 
334 static GstStaticCaps utf16_caps = GST_STATIC_CAPS ("text/utf-16");
335 
336 #define UTF16_CAPS gst_static_caps_get(&utf16_caps)
337 
338 static void
utf16_type_find(GstTypeFind * tf,gpointer unused)339 utf16_type_find (GstTypeFind * tf, gpointer unused)
340 {
341   static const GstUnicodeTester utf16tester[2] = {
342     {2, "\xff\xfe", check_utf16, 10, G_LITTLE_ENDIAN},
343     {2, "\xfe\xff", check_utf16, 20, G_BIG_ENDIAN},
344   };
345   unicode_type_find (tf, utf16tester, G_N_ELEMENTS (utf16tester),
346       "text/utf-16", TRUE);
347 }
348 
349 static GstStaticCaps utf32_caps = GST_STATIC_CAPS ("text/utf-32");
350 
351 #define UTF32_CAPS gst_static_caps_get(&utf32_caps)
352 
353 static void
utf32_type_find(GstTypeFind * tf,gpointer unused)354 utf32_type_find (GstTypeFind * tf, gpointer unused)
355 {
356   static const GstUnicodeTester utf32tester[2] = {
357     {4, "\xff\xfe\x00\x00", check_utf32, 10, G_LITTLE_ENDIAN},
358     {4, "\x00\x00\xfe\xff", check_utf32, 20, G_BIG_ENDIAN}
359   };
360   unicode_type_find (tf, utf32tester, G_N_ELEMENTS (utf32tester),
361       "text/utf-32", TRUE);
362 }
363 
364 /*** text/uri-list ***/
365 
366 static GstStaticCaps uri_caps = GST_STATIC_CAPS ("text/uri-list");
367 
368 #define URI_CAPS (gst_static_caps_get(&uri_caps))
369 #define BUFFER_SIZE 16          /* If the string is < 16 bytes we're screwed */
370 #define INC_BUFFER {                                                    \
371   pos++;                                                                \
372   if (pos == BUFFER_SIZE) {                                             \
373     pos = 0;                                                            \
374     offset += BUFFER_SIZE;                                              \
375     data = gst_type_find_peek (tf, offset, BUFFER_SIZE);                \
376     if (data == NULL) return;                                           \
377   } else {                                                              \
378     data++;                                                             \
379   }                                                                     \
380 }
381 static void
uri_type_find(GstTypeFind * tf,gpointer unused)382 uri_type_find (GstTypeFind * tf, gpointer unused)
383 {
384   const guint8 *data = gst_type_find_peek (tf, 0, BUFFER_SIZE);
385   guint pos = 0;
386   guint offset = 0;
387 
388   if (data) {
389     /* Search for # comment lines */
390     while (*data == '#') {
391       /* Goto end of line */
392       while (*data != '\n') {
393         INC_BUFFER;
394       }
395 
396       INC_BUFFER;
397     }
398 
399     if (!g_ascii_isalpha (*data)) {
400       /* Had a non alpha char - can't be uri-list */
401       return;
402     }
403 
404     INC_BUFFER;
405 
406     while (g_ascii_isalnum (*data)) {
407       INC_BUFFER;
408     }
409 
410     if (*data != ':') {
411       /* First non alpha char is not a : */
412       return;
413     }
414 
415     /* Get the next 2 bytes as well */
416     data = gst_type_find_peek (tf, offset + pos, 3);
417     if (data == NULL)
418       return;
419 
420     if (data[1] != '/' && data[2] != '/') {
421       return;
422     }
423 
424     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, URI_CAPS);
425   }
426 }
427 
428 /*** application/itc ***/
429 static GstStaticCaps itc_caps = GST_STATIC_CAPS ("application/itc");
430 #define ITC_CAPS (gst_static_caps_get(&itc_caps))
431 
432 static void
itc_type_find(GstTypeFind * tf,gpointer unused)433 itc_type_find (GstTypeFind * tf, gpointer unused)
434 {
435   DataScanCtx c = { 0, NULL, 0 };
436   guint8 magic[8] = { 0x00, 0x00, 0x01, 0x1C, 0x69, 0x74, 0x63, 0x68 };
437   guint8 preamble[4] = { 0x00, 0x00, 0x00, 0x02 };
438   guint8 artwork_marker[8] = { 0x00, 0x00, 0x00, 0x00, 0x61, 0x72, 0x74, 0x77 };
439   guint8 item_marker[4] = { 0x69, 0x74, 0x65, 0x6D };
440   GstTypeFindProbability itc_prob = GST_TYPE_FIND_NONE;
441   int i;
442 
443   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 8)))
444     return;
445 
446   if (memcmp (c.data, magic, 8))
447     return;
448 
449   /* At least we found the right magic */
450   itc_prob = GST_TYPE_FIND_MINIMUM;
451   data_scan_ctx_advance (tf, &c, 8);
452 
453   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 12)))
454     goto done;
455 
456   /* Check preamble 3 consecutive times */
457   for (i = 0; i < 3; i++) {
458     if (memcmp (c.data, preamble, 4))
459       goto done;
460     data_scan_ctx_advance (tf, &c, 4);
461   }
462 
463   itc_prob = GST_TYPE_FIND_POSSIBLE;
464 
465   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 8)))
466     goto done;
467 
468   if (memcmp (c.data, artwork_marker, 8))
469     goto done;
470 
471   itc_prob = GST_TYPE_FIND_LIKELY;
472   data_scan_ctx_advance (tf, &c, 8);
473 
474   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 256)))
475     goto done;
476 
477   /* ...and 256 0x00 padding bytes on what looks like the header's end */
478   for (i = 0; i < 256; i++) {
479     if (c.data[i])
480       goto done;
481   }
482 
483   itc_prob = GST_TYPE_FIND_NEARLY_CERTAIN;
484   data_scan_ctx_advance (tf, &c, 256);
485 
486   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 8)))
487     goto done;
488 
489   if (memcmp (c.data + 4, item_marker, 4))
490     goto done;
491 
492   itc_prob = GST_TYPE_FIND_MAXIMUM;
493 
494 done:
495   gst_type_find_suggest (tf, itc_prob, ITC_CAPS);
496 }
497 
498 /*** application/x-hls ***/
499 
500 static GstStaticCaps hls_caps = GST_STATIC_CAPS ("application/x-hls");
501 #define HLS_CAPS (gst_static_caps_get(&hls_caps))
502 
503 /* See http://tools.ietf.org/html/draft-pantos-http-live-streaming-05 */
504 static void
hls_type_find(GstTypeFind * tf,gpointer unused)505 hls_type_find (GstTypeFind * tf, gpointer unused)
506 {
507   DataScanCtx c = { 0, NULL, 0 };
508 
509   /* Minimum useful size is #EXTM3U\n + 1 tag + ':' = 30 bytes */
510   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 30)))
511     return;
512 
513   if (memcmp (c.data, "#EXTM3U", 7))
514     return;
515 
516   data_scan_ctx_advance (tf, &c, 7);
517 
518   /* Check only the first 4KB */
519   while (c.offset < 4096) {
520     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 21)))
521       return;
522 
523     /* Search for # comment lines */
524     if (c.data[0] == '#' && (memcmp (c.data, "#EXT-X-TARGETDURATION", 21) == 0
525             || memcmp (c.data, "#EXT-X-STREAM-INF", 17) == 0
526             || memcmp (c.data, "#EXT-X-MEDIA", 12) == 0)) {
527       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HLS_CAPS);
528       return;
529     }
530 
531     data_scan_ctx_advance (tf, &c, 1);
532   }
533 }
534 
535 
536 /*** application/xml **********************************************************/
537 
538 #define XML_BUFFER_SIZE 16
539 #define XML_INC_BUFFER {                                                \
540   pos++;                                                                \
541   if (pos == XML_BUFFER_SIZE) {                                         \
542     pos = 0;                                                            \
543     offset += XML_BUFFER_SIZE;                                          \
544     data = gst_type_find_peek (tf, offset, XML_BUFFER_SIZE);            \
545     if (data == NULL) return FALSE;                                     \
546   } else {                                                              \
547     data++;                                                             \
548   }                                                                     \
549 }
550 
551 #define XML_INC_BUFFER_DATA {                                           \
552   pos++;                                                                \
553   if (pos >= length) {                                                  \
554     return FALSE;                                                       \
555   } else {                                                              \
556     data++;                                                             \
557   }                                                                     \
558 }
559 
560 static gboolean
xml_check_first_element_from_data(const guint8 * data,guint length,const gchar * element,guint elen,gboolean strict)561 xml_check_first_element_from_data (const guint8 * data, guint length,
562     const gchar * element, guint elen, gboolean strict)
563 {
564   gboolean got_xmldec;
565   guint pos = 0;
566 
567   g_return_val_if_fail (data != NULL, FALSE);
568 
569   if (length <= 5)
570     return FALSE;
571 
572   /* look for the XMLDec
573    * see XML spec 2.8, Prolog and Document Type Declaration
574    * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
575   got_xmldec = (memcmp (data, "<?xml", 5) == 0);
576 
577   if (strict && !got_xmldec)
578     return FALSE;
579 
580   /* skip XMLDec in any case if we've got one */
581   if (got_xmldec) {
582     pos += 5;
583     data += 5;
584   }
585 
586   /* look for the first element, it has to be the requested element. Bail
587    * out if it is not within the first 4kB. */
588   while (pos < MIN (4096, length)) {
589     while (*data != '<' && pos < MIN (4096, length)) {
590       XML_INC_BUFFER_DATA;
591     }
592 
593     XML_INC_BUFFER_DATA;
594     if (!g_ascii_isalpha (*data)) {
595       /* if not alphabetic, it's a PI or an element / attribute declaration
596        * like <?xxx or <!xxx */
597       XML_INC_BUFFER_DATA;
598       continue;
599     }
600 
601     /* the first normal element, check if it's the one asked for */
602     if (pos + elen + 1 >= length)
603       return FALSE;
604     return (element && strncmp ((const char *) data, element, elen) == 0);
605   }
606 
607   return FALSE;
608 }
609 
610 static gboolean
xml_check_first_element(GstTypeFind * tf,const gchar * element,guint elen,gboolean strict)611 xml_check_first_element (GstTypeFind * tf, const gchar * element, guint elen,
612     gboolean strict)
613 {
614   gboolean got_xmldec;
615   const guint8 *data;
616   guint offset = 0;
617   guint pos = 0;
618 
619   data = gst_type_find_peek (tf, 0, XML_BUFFER_SIZE);
620   if (!data)
621     return FALSE;
622 
623   /* look for the XMLDec
624    * see XML spec 2.8, Prolog and Document Type Declaration
625    * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
626   got_xmldec = (memcmp (data, "<?xml", 5) == 0);
627 
628   if (strict && !got_xmldec)
629     return FALSE;
630 
631   /* skip XMLDec in any case if we've got one */
632   if (got_xmldec) {
633     pos += 5;
634     data += 5;
635   }
636 
637   /* look for the first element, it has to be the requested element. Bail
638    * out if it is not within the first 4kB. */
639   while (data && (offset + pos) < 4096) {
640     while (*data != '<' && (offset + pos) < 4096) {
641       XML_INC_BUFFER;
642     }
643 
644     XML_INC_BUFFER;
645     if (!g_ascii_isalpha (*data)) {
646       /* if not alphabetic, it's a PI or an element / attribute declaration
647        * like <?xxx or <!xxx */
648       XML_INC_BUFFER;
649       continue;
650     }
651 
652     /* the first normal element, check if it's the one asked for */
653     data = gst_type_find_peek (tf, offset + pos, elen + 1);
654     return (data && element && strncmp ((char *) data, element, elen) == 0);
655   }
656 
657   return FALSE;
658 }
659 
660 static GstStaticCaps generic_xml_caps = GST_STATIC_CAPS ("application/xml");
661 
662 #define GENERIC_XML_CAPS (gst_static_caps_get(&generic_xml_caps))
663 static void
xml_type_find(GstTypeFind * tf,gpointer unused)664 xml_type_find (GstTypeFind * tf, gpointer unused)
665 {
666   if (xml_check_first_element (tf, "", 0, TRUE)) {
667     gst_type_find_suggest (tf, GST_TYPE_FIND_MINIMUM, GENERIC_XML_CAPS);
668   }
669 }
670 
671 /*** application/dash+xml ****************************************************/
672 
673 static GstStaticCaps dash_caps = GST_STATIC_CAPS ("application/dash+xml");
674 
675 #define DASH_CAPS gst_static_caps_get (&dash_caps)
676 
677 static void
dash_mpd_type_find(GstTypeFind * tf,gpointer unused)678 dash_mpd_type_find (GstTypeFind * tf, gpointer unused)
679 {
680   if (xml_check_first_element (tf, "MPD", 3, FALSE) ||
681       xml_check_first_element (tf, "mpd", 3, FALSE)) {
682     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, DASH_CAPS);
683   }
684 }
685 
686 /*** application/xges ****************************************************/
687 
688 static GstStaticCaps xges_caps = GST_STATIC_CAPS ("application/xges");
689 
690 #define XGES_CAPS gst_static_caps_get (&xges_caps)
691 
692 static void
xges_type_find(GstTypeFind * tf,gpointer unused)693 xges_type_find (GstTypeFind * tf, gpointer unused)
694 {
695   if (xml_check_first_element (tf, "ges", 3, FALSE)) {
696     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, XGES_CAPS);
697   }
698 }
699 
700 
701 /*** application/sdp *********************************************************/
702 
703 static GstStaticCaps sdp_caps = GST_STATIC_CAPS ("application/sdp");
704 
705 #define SDP_CAPS (gst_static_caps_get(&sdp_caps))
706 static gboolean
sdp_check_header(GstTypeFind * tf)707 sdp_check_header (GstTypeFind * tf)
708 {
709   const guint8 *data;
710 
711   data = gst_type_find_peek (tf, 0, 5);
712   if (!data)
713     return FALSE;
714 
715   /* sdp must start with v=0[\r]\n */
716   if (memcmp (data, "v=0", 3))
717     return FALSE;
718 
719   if (data[3] == '\r' && data[4] == '\n')
720     return TRUE;
721   if (data[3] == '\n')
722     return TRUE;
723 
724   return FALSE;
725 }
726 
727 static void
sdp_type_find(GstTypeFind * tf,gpointer unused)728 sdp_type_find (GstTypeFind * tf, gpointer unused)
729 {
730   if (sdp_check_header (tf))
731     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SDP_CAPS);
732 }
733 
734 /*** application/smil *********************************************************/
735 
736 static GstStaticCaps smil_caps = GST_STATIC_CAPS ("application/smil");
737 
738 #define SMIL_CAPS (gst_static_caps_get(&smil_caps))
739 static void
smil_type_find(GstTypeFind * tf,gpointer unused)740 smil_type_find (GstTypeFind * tf, gpointer unused)
741 {
742   if (xml_check_first_element (tf, "smil", 4, FALSE)) {
743     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SMIL_CAPS);
744   }
745 }
746 
747 /*** application/ttml+xml *****************************************************/
748 
749 static GstStaticCaps ttml_xml_caps = GST_STATIC_CAPS ("application/ttml+xml");
750 
751 #define TTML_XML_CAPS (gst_static_caps_get(&ttml_xml_caps))
752 static void
ttml_xml_type_find(GstTypeFind * tf,gpointer unused)753 ttml_xml_type_find (GstTypeFind * tf, gpointer unused)
754 {
755   if (xml_check_first_element (tf, "tt", 2, FALSE)) {
756     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TTML_XML_CAPS);
757   }
758 }
759 
760 /*** text/html ***/
761 
762 static GstStaticCaps html_caps = GST_STATIC_CAPS ("text/html");
763 
764 #define HTML_CAPS gst_static_caps_get (&html_caps)
765 
766 static void
html_type_find(GstTypeFind * tf,gpointer unused)767 html_type_find (GstTypeFind * tf, gpointer unused)
768 {
769   const gchar *d, *data;
770 
771   data = (const gchar *) gst_type_find_peek (tf, 0, 16);
772   if (!data)
773     return;
774 
775   if (!g_ascii_strncasecmp (data, "<!DOCTYPE HTML", 14)) {
776     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
777   } else if (xml_check_first_element (tf, "html", 4, FALSE)) {
778     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
779   } else if ((d = memchr (data, '<', 16))) {
780     data = (const gchar *) gst_type_find_peek (tf, d - data, 6);
781     if (data && g_ascii_strncasecmp (data, "<html>", 6) == 0) {
782       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
783     }
784   }
785 }
786 
787 /*** audio/midi ***/
788 
789 static GstStaticCaps mid_caps = GST_STATIC_CAPS ("audio/midi");
790 
791 #define MID_CAPS gst_static_caps_get(&mid_caps)
792 static void
mid_type_find(GstTypeFind * tf,gpointer unused)793 mid_type_find (GstTypeFind * tf, gpointer unused)
794 {
795   const guint8 *data = gst_type_find_peek (tf, 0, 4);
796 
797   /* http://jedi.ks.uiuc.edu/~johns/links/music/midifile.html */
798   if (data && data[0] == 'M' && data[1] == 'T' && data[2] == 'h'
799       && data[3] == 'd')
800     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MID_CAPS);
801 }
802 
803 /*** audio/mobile-xmf ***/
804 
805 static GstStaticCaps mxmf_caps = GST_STATIC_CAPS ("audio/mobile-xmf");
806 
807 #define MXMF_CAPS gst_static_caps_get(&mxmf_caps)
808 static void
mxmf_type_find(GstTypeFind * tf,gpointer unused)809 mxmf_type_find (GstTypeFind * tf, gpointer unused)
810 {
811   const guint8 *data = NULL;
812 
813   /* Search FileId "XMF_" 4 bytes */
814   data = gst_type_find_peek (tf, 0, 4);
815   if (data && data[0] == 'X' && data[1] == 'M' && data[2] == 'F'
816       && data[3] == '_') {
817     /* Search Format version "2.00" 4 bytes */
818     data = gst_type_find_peek (tf, 4, 4);
819     if (data && data[0] == '2' && data[1] == '.' && data[2] == '0'
820         && data[3] == '0') {
821       /* Search TypeId 2     1 byte */
822       data = gst_type_find_peek (tf, 11, 1);
823       if (data && data[0] == 2) {
824         gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MXMF_CAPS);
825       }
826     }
827   }
828 }
829 
830 
831 /*** video/x-fli ***/
832 
833 static GstStaticCaps flx_caps = GST_STATIC_CAPS ("video/x-fli");
834 
835 #define FLX_CAPS gst_static_caps_get(&flx_caps)
836 static void
flx_type_find(GstTypeFind * tf,gpointer unused)837 flx_type_find (GstTypeFind * tf, gpointer unused)
838 {
839   const guint8 *data = gst_type_find_peek (tf, 0, 134);
840 
841   if (data) {
842     /* check magic and the frame type of the first frame */
843     if ((data[4] == 0x11 || data[4] == 0x12 ||
844             data[4] == 0x30 || data[4] == 0x44) &&
845         data[5] == 0xaf &&
846         ((data[132] == 0x00 || data[132] == 0xfa) && data[133] == 0xf1)) {
847       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLX_CAPS);
848     }
849     return;
850   }
851   data = gst_type_find_peek (tf, 0, 6);
852   if (data) {
853     /* check magic only */
854     if ((data[4] == 0x11 || data[4] == 0x12 ||
855             data[4] == 0x30 || data[4] == 0x44) && data[5] == 0xaf) {
856       gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, FLX_CAPS);
857     }
858     return;
859   }
860 }
861 
862 /*** application/x-id3 ***/
863 
864 static GstStaticCaps id3_caps = GST_STATIC_CAPS ("application/x-id3");
865 
866 #define ID3_CAPS gst_static_caps_get(&id3_caps)
867 static void
id3v2_type_find(GstTypeFind * tf,gpointer unused)868 id3v2_type_find (GstTypeFind * tf, gpointer unused)
869 {
870 /* ohos.opt.compat.0001: The demux of gstplayer does not accurately parse audio resources in the MP3 format.
871  * As a result, the duration value cannot be obtained in the preparation phase.
872  * Use the demux and typefind of ffmpeg to process audio resources in MP3 format.
873  */
874 #ifdef OHOS_OPT_COMPAT
875   return;
876 #endif
877   const guint8 *data = gst_type_find_peek (tf, 0, 10);
878 
879   if (data && memcmp (data, "ID3", 3) == 0 &&
880       data[3] != 0xFF && data[4] != 0xFF &&
881       (data[6] & 0x80) == 0 && (data[7] & 0x80) == 0 &&
882       (data[8] & 0x80) == 0 && (data[9] & 0x80) == 0) {
883     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
884   }
885 }
886 
887 static void
id3v1_type_find(GstTypeFind * tf,gpointer unused)888 id3v1_type_find (GstTypeFind * tf, gpointer unused)
889 {
890 /* ohos.opt.compat.0001: The demux of gstplayer does not accurately parse audio resources in the MP3 format.
891  * As a result, the duration value cannot be obtained in the preparation phase.
892  * Use the demux and typefind of ffmpeg to process audio resources in MP3 format.
893  */
894 #ifdef OHOS_OPT_COMPAT
895   return;
896 #endif
897   const guint8 *data = gst_type_find_peek (tf, -128, 3);
898 
899   if (data && memcmp (data, "TAG", 3) == 0) {
900     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
901   }
902 }
903 
904 /*** application/x-ape ***/
905 
906 static GstStaticCaps apetag_caps = GST_STATIC_CAPS ("application/x-apetag");
907 
908 #define APETAG_CAPS gst_static_caps_get(&apetag_caps)
909 static void
apetag_type_find(GstTypeFind * tf,gpointer unused)910 apetag_type_find (GstTypeFind * tf, gpointer unused)
911 {
912   const guint8 *data;
913 
914   /* APEv1/2 at start of file */
915   data = gst_type_find_peek (tf, 0, 8);
916   if (data && !memcmp (data, "APETAGEX", 8)) {
917     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
918     return;
919   }
920 
921   /* APEv1/2 at end of file */
922   data = gst_type_find_peek (tf, -32, 8);
923   if (data && !memcmp (data, "APETAGEX", 8)) {
924     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
925     return;
926   }
927 }
928 
929 /*** audio/x-ttafile ***/
930 
931 static GstStaticCaps tta_caps = GST_STATIC_CAPS ("audio/x-ttafile");
932 
933 #define TTA_CAPS gst_static_caps_get(&tta_caps)
934 static void
tta_type_find(GstTypeFind * tf,gpointer unused)935 tta_type_find (GstTypeFind * tf, gpointer unused)
936 {
937   const guint8 *data = gst_type_find_peek (tf, 0, 3);
938 
939   if (data) {
940     if (memcmp (data, "TTA", 3) == 0) {
941       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TTA_CAPS);
942       return;
943     }
944   }
945 }
946 
947 /*** audio/x-flac ***/
948 static GstStaticCaps flac_caps = GST_STATIC_CAPS ("audio/x-flac");
949 
950 #define FLAC_CAPS (gst_static_caps_get(&flac_caps))
951 
952 static void
flac_type_find(GstTypeFind * tf,gpointer unused)953 flac_type_find (GstTypeFind * tf, gpointer unused)
954 {
955   DataScanCtx c = { 0, NULL, 0 };
956 
957   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
958     return;
959 
960   /* standard flac (also old/broken flac-in-ogg with an initial 4-byte marker
961    * packet and without the usual packet framing) */
962   if (memcmp (c.data, "fLaC", 4) == 0) {
963     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLAC_CAPS);
964     return;
965   }
966 
967   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
968     return;
969 
970   /* flac-in-ogg, see http://flac.sourceforge.net/ogg_mapping.html */
971   if (memcmp (c.data, "\177FLAC\001", 6) == 0) {
972     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLAC_CAPS);
973     return;
974   }
975 
976 /* disabled because it happily typefinds /dev/urandom as audio/x-flac, and
977  * because I yet have to see header-less flac in the wild */
978 #if 0
979   /* flac without headers (subset format) */
980   /* 64K should be enough */
981   while (c.offset < (64 * 1024)) {
982     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
983       break;
984 
985     /* look for frame header,
986      * http://flac.sourceforge.net/format.html#frame_header
987      */
988     if (c.data[0] == 0xff && (c.data[1] >> 2) == 0x3e) {
989       /* bit 15 in the header must be 0 */
990       if (((c.data[1] >> 1) & 0x01) == 0x01)
991         goto advance;
992 
993       /* blocksize must be != 0x00 */
994       if ((c.data[2] >> 4) == 0x00)
995         goto advance;
996 
997       /* samplerate must be != 0x0f */
998       if ((c.data[2] & 0x0f) == 0x0f)
999         goto advance;
1000       /* also 0 is invalid, as it means get the info from the header and we
1001        * don't have headers if we are here */
1002       if ((c.data[2] & 0x0f) == 0x00)
1003         goto advance;
1004 
1005       /* channel assignment must be < 11 */
1006       if ((c.data[3] >> 4) >= 11)
1007         goto advance;
1008 
1009       /* sample size must be != 0x07 and != 0x05 */
1010       if (((c.data[3] >> 1) & 0x07) == 0x07)
1011         goto advance;
1012       if (((c.data[3] >> 1) & 0x07) == 0x05)
1013         goto advance;
1014       /* also 0 is invalid, as it means get the info from the header and we
1015        * don't have headers if we are here */
1016       if (((c.data[3] >> 1) & 0x07) == 0x00)
1017         goto advance;
1018 
1019       /* next bit must be 0 */
1020       if ((c.data[3] & 0x01) == 0x01)
1021         goto advance;
1022 
1023       /* FIXME: shouldn't we include the crc check ? */
1024 
1025       GST_DEBUG ("Found flac without headers at %d", (gint) c.offset);
1026       gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, FLAC_CAPS);
1027       return;
1028     }
1029   advance:
1030     data_scan_ctx_advance (tf, &c, 1);
1031   }
1032 #endif
1033 }
1034 
1035 /* TODO: we could probably make a generic function for this.. */
1036 static gint
aac_type_find_scan_loas_frames_ep(GstTypeFind * tf,DataScanCtx * scan_ctx,gint max_frames)1037 aac_type_find_scan_loas_frames_ep (GstTypeFind * tf, DataScanCtx * scan_ctx,
1038     gint max_frames)
1039 {
1040   DataScanCtx c = *scan_ctx;
1041   guint16 snc;
1042   guint len;
1043   gint count = 0;
1044 
1045   do {
1046     if (!data_scan_ctx_ensure_data (tf, &c, 5))
1047       break;
1048 
1049     /* EPAudioSyncStream */
1050     len = ((c.data[2] & 0x0f) << 9) | (c.data[3] << 1) |
1051         ((c.data[4] & 0x80) >> 7);
1052 
1053     if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) {
1054       GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1055       break;
1056     }
1057 
1058     /* check length of frame  */
1059     snc = GST_READ_UINT16_BE (c.data + len);
1060     if (snc != 0x4de1) {
1061       GST_DEBUG ("No sync found at 0x%" G_GINT64_MODIFIER "x", c.offset + len);
1062       break;
1063     }
1064 
1065     ++count;
1066 
1067     GST_DEBUG ("Found LOAS syncword #%d at offset 0x%" G_GINT64_MODIFIER "x, "
1068         "framelen %u", count, c.offset, len);
1069 
1070     data_scan_ctx_advance (tf, &c, len);
1071   } while (count < max_frames && (c.offset - scan_ctx->offset) < 64 * 1024);
1072 
1073   GST_DEBUG ("found %d consecutive frames", count);
1074   return count;
1075 }
1076 
1077 static gint
aac_type_find_scan_loas_frames(GstTypeFind * tf,DataScanCtx * scan_ctx,gint max_frames)1078 aac_type_find_scan_loas_frames (GstTypeFind * tf, DataScanCtx * scan_ctx,
1079     gint max_frames)
1080 {
1081   DataScanCtx c = *scan_ctx;
1082   guint16 snc;
1083   guint len;
1084   gint count = 0;
1085 
1086   do {
1087     if (!data_scan_ctx_ensure_data (tf, &c, 3))
1088       break;
1089 
1090     /* AudioSyncStream */
1091     len = ((c.data[1] & 0x1f) << 8) | c.data[2];
1092     /* add size of sync stream header */
1093     len += 3;
1094 
1095     if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) {
1096       GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1097       break;
1098     }
1099 
1100     /* check length of frame  */
1101     snc = GST_READ_UINT16_BE (c.data + len);
1102     if ((snc & 0xffe0) != 0x56e0) {
1103       GST_DEBUG ("No sync found at 0x%" G_GINT64_MODIFIER "x", c.offset + len);
1104       break;
1105     }
1106 
1107     ++count;
1108 
1109     GST_DEBUG ("Found LOAS syncword #%d at offset 0x%" G_GINT64_MODIFIER "x, "
1110         "framelen %u", count, c.offset, len);
1111 
1112     data_scan_ctx_advance (tf, &c, len);
1113   } while (count < max_frames && (c.offset - scan_ctx->offset) < 64 * 1024);
1114 
1115   GST_DEBUG ("found %d consecutive frames", count);
1116   return count;
1117 }
1118 
1119 /*** audio/mpeg version 2, 4 ***/
1120 
1121 static GstStaticCaps aac_caps = GST_STATIC_CAPS ("audio/mpeg, "
1122     "mpegversion = (int) { 2, 4 }, framed = (bool) false");
1123 #define AAC_CAPS (gst_static_caps_get(&aac_caps))
1124 #define AAC_AMOUNT (4096)
1125 static void
aac_type_find(GstTypeFind * tf,gpointer unused)1126 aac_type_find (GstTypeFind * tf, gpointer unused)
1127 {
1128 /* ohos.opt.compat.0002: the demux of gstplayer does not accurately parse audio resources in the aac format.
1129  * As a result, the duration value cannot be obtained in the preparation phase.
1130  * Use the demux and typefind of ffmpeg to process audio resources in aac format.
1131  */
1132 #ifdef OHOS_OPT_COMPAT
1133   return;
1134 #endif
1135   DataScanCtx c = { 0, NULL, 0 };
1136   GstTypeFindProbability best_probability = GST_TYPE_FIND_NONE;
1137   GstCaps *best_caps = NULL;
1138   gint best_count = 0;
1139 
1140   while (c.offset < AAC_AMOUNT) {
1141     guint snc, len, offset, i;
1142 
1143     /* detect adts header or adif header.
1144      * The ADIF header is 4 bytes, that should be OK. The ADTS header, on
1145      * the other hand, is 14 bits only, so we require one valid frame with
1146      * again a valid syncpoint on the next one (28 bits) for certainty. We
1147      * require 4 kB, which is quite a lot, since frames are generally 200-400
1148      * bytes.
1149      * LOAS has 2 possible syncwords, which are 11 bits and 16 bits long.
1150      * The following stream syntax depends on which one is found.
1151      */
1152     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
1153       break;
1154 
1155     snc = GST_READ_UINT16_BE (c.data);
1156     if (G_UNLIKELY ((snc & 0xfff6) == 0xfff0)) {
1157       /* ADTS header - find frame length */
1158       GST_DEBUG ("Found one ADTS syncpoint at offset 0x%" G_GINT64_MODIFIER
1159           "x, tracing next...", c.offset);
1160       len = ((c.data[3] & 0x03) << 11) |
1161           (c.data[4] << 3) | ((c.data[5] & 0xe0) >> 5);
1162 
1163       if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 6)) {
1164         GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1165         goto next;
1166       }
1167 
1168       offset = len;
1169       /* check if there's a second ADTS frame */
1170       snc = GST_READ_UINT16_BE (c.data + offset);
1171       if ((snc & 0xfff6) == 0xfff0) {
1172         GstCaps *caps;
1173         guint mpegversion, sample_freq_idx, channel_config, profile_idx, rate;
1174         guint8 audio_config[2];
1175 
1176         mpegversion = (c.data[1] & 0x08) ? 2 : 4;
1177         profile_idx = c.data[2] >> 6;
1178         sample_freq_idx = ((c.data[2] & 0x3c) >> 2);
1179         channel_config = ((c.data[2] & 0x01) << 2) + (c.data[3] >> 6);
1180 
1181         GST_DEBUG ("Found second ADTS-%d syncpoint at offset 0x%"
1182             G_GINT64_MODIFIER "x, framelen %u", mpegversion, c.offset, len);
1183 
1184         /* 0xd and 0xe are reserved. 0xf means the sample frequency is directly
1185          * specified in the header, but that's not allowed for ADTS */
1186         if (sample_freq_idx > 0xc) {
1187           GST_DEBUG ("Unexpected sample frequency index %d or wrong sync",
1188               sample_freq_idx);
1189           goto next;
1190         }
1191 
1192         rate = gst_codec_utils_aac_get_sample_rate_from_index (sample_freq_idx);
1193         GST_LOG ("ADTS: profile=%u, rate=%u", profile_idx, rate);
1194 
1195         /* The ADTS frame header is slightly different from the
1196          * AudioSpecificConfig defined for the MPEG-4 container, so we just
1197          * construct enough of it for getting the level here. */
1198         /* ADTS counts profiles from 0 instead of 1 to save bits */
1199         audio_config[0] = (profile_idx + 1) << 3;
1200         audio_config[0] |= (sample_freq_idx >> 1) & 0x7;
1201         audio_config[1] = (sample_freq_idx & 0x1) << 7;
1202         audio_config[1] |= (channel_config & 0xf) << 3;
1203 
1204         caps = gst_caps_new_simple ("audio/mpeg",
1205             "framed", G_TYPE_BOOLEAN, FALSE,
1206             "mpegversion", G_TYPE_INT, mpegversion,
1207             "stream-format", G_TYPE_STRING, "adts", NULL);
1208 
1209         gst_codec_utils_aac_caps_set_level_and_profile (caps, audio_config, 2);
1210 
1211         /* add rate and number of channels if we can */
1212         if (channel_config != 0 && channel_config <= 7) {
1213           const guint channels_map[] = { 0, 1, 2, 3, 4, 5, 6, 8 };
1214 
1215           gst_caps_set_simple (caps, "channels", G_TYPE_INT,
1216               channels_map[channel_config], "rate", G_TYPE_INT, rate, NULL);
1217         }
1218 
1219         /* length of the second ADTS frame */
1220         len = ((c.data[offset + 3] & 0x03) << 11) |
1221             (c.data[offset + 4] << 3) | ((c.data[offset + 5] & 0xe0) >> 5);
1222 
1223         if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, offset + len + 6)) {
1224           GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1225           gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, caps);
1226         } else {
1227           offset += len;
1228           /* find more aac sync to select correctly */
1229           /* check if there's a third/fourth/fifth/sixth ADTS frame, if there is a sixth frame, set probability to maximum:100% */
1230           for (i = 3; i <= 6; i++) {
1231             len = ((c.data[offset + 3] & 0x03) << 11) |
1232                 (c.data[offset + 4] << 3) | ((c.data[offset + 5] & 0xe0) >> 5);
1233             if (len == 0
1234                 || !data_scan_ctx_ensure_data (tf, &c, offset + len + 6)) {
1235               GST_DEBUG ("Wrong sync or next frame not within reach, len=%u",
1236                   len);
1237               break;
1238             }
1239             snc = GST_READ_UINT16_BE (c.data + offset);
1240             if ((snc & 0xfff6) == 0xfff0) {
1241               GST_DEBUG ("Find %und Sync..probability is %u ", i,
1242                   GST_TYPE_FIND_LIKELY + 5 * (i - 2));
1243               offset += len;
1244             } else {
1245               break;
1246             }
1247           }
1248           gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY + 5 * (i - 3), caps);
1249 
1250         }
1251         gst_caps_unref (caps);
1252         break;
1253       }
1254 
1255       GST_DEBUG ("No next frame found... (should have been at 0x%x)", len);
1256     } else if (G_UNLIKELY ((snc & 0xffe0) == 0x56e0 || snc == 0x4de1)) {
1257       gint count;
1258 
1259       /* LOAS frame */
1260       GST_INFO ("Possible LOAS syncword at offset 0x%" G_GINT64_MODIFIER
1261           "x, scanning for more frames...", c.offset);
1262 
1263       if (snc == 0x4de1)
1264         count = aac_type_find_scan_loas_frames_ep (tf, &c, 20);
1265       else
1266         count = aac_type_find_scan_loas_frames (tf, &c, 20);
1267 
1268       if (count >= 3 && count > best_count) {
1269         gst_caps_replace (&best_caps, NULL);
1270         best_caps = gst_caps_new_simple ("audio/mpeg",
1271             "framed", G_TYPE_BOOLEAN, FALSE,
1272             "mpegversion", G_TYPE_INT, 4,
1273             "stream-format", G_TYPE_STRING, "loas", NULL);
1274         best_count = count;
1275         best_probability = GST_TYPE_FIND_POSSIBLE - 10 + count * 3;
1276         if (best_probability >= GST_TYPE_FIND_LIKELY)
1277           break;
1278       }
1279     } else if (!memcmp (c.data, "ADIF", 4)) {
1280       /* ADIF header */
1281       gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, "audio/mpeg",
1282           "framed", G_TYPE_BOOLEAN, FALSE, "mpegversion", G_TYPE_INT, 4,
1283           "stream-format", G_TYPE_STRING, "adif", NULL);
1284       break;
1285     }
1286 
1287   next:
1288 
1289     data_scan_ctx_advance (tf, &c, 1);
1290   }
1291 
1292   if (best_probability > GST_TYPE_FIND_NONE) {
1293     gst_type_find_suggest (tf, best_probability, best_caps);
1294     gst_caps_unref (best_caps);
1295   }
1296 }
1297 
1298 /*** audio/mpeg version 1 ***/
1299 
1300 /*
1301  * The chance that random data is identified as a valid mp3 header is 63 / 2^18
1302  * (0.024%) per try. This makes the function for calculating false positives
1303  *   1 - (1 - ((63 / 2 ^18) ^ GST_MP3_TYPEFIND_MIN_HEADERS)) ^ buffersize)
1304  * This has the following probabilities of false positives:
1305  * datasize               MIN_HEADERS
1306  * (bytes)      1       2       3       4
1307  * 4096         62.6%    0.02%   0%      0%
1308  * 16384        98%      0.09%   0%      0%
1309  * 1 MiB       100%      5.88%   0%      0%
1310  * 1 GiB       100%    100%      1.44%   0%
1311  * 1 TiB       100%    100%    100%      0.35%
1312  * This means that the current choice (3 headers by most of the time 4096 byte
1313  * buffers is pretty safe for now.
1314  *
1315  * The max. size of each frame is 1440 bytes, which means that for N frames to
1316  * be detected, we need 1440 * GST_MP3_TYPEFIND_MIN_HEADERS + 3 bytes of data.
1317  * Assuming we step into the stream right after the frame header, this
1318  * means we need 1440 * (GST_MP3_TYPEFIND_MIN_HEADERS + 1) - 1 + 3 bytes
1319  * of data (5762) to always detect any mp3.
1320  */
1321 
1322 static const guint mp3types_bitrates[2][3][16] =
1323     { {{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,},
1324     {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,},
1325     {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,}},
1326 {{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,},
1327     {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,},
1328     {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,}},
1329 };
1330 
1331 static const guint mp3types_freqs[3][3] = { {11025, 12000, 8000},
1332 {22050, 24000, 16000},
1333 {44100, 48000, 32000}
1334 };
1335 
1336 static inline guint
mp3_type_frame_length_from_header(guint32 header,guint * put_layer,guint * put_channels,guint * put_bitrate,guint * put_samplerate,gboolean * may_be_free_format,gint possible_free_framelen)1337 mp3_type_frame_length_from_header (guint32 header, guint * put_layer,
1338     guint * put_channels, guint * put_bitrate, guint * put_samplerate,
1339     gboolean * may_be_free_format, gint possible_free_framelen)
1340 {
1341   guint bitrate, layer, length, mode, samplerate, version, channels;
1342 
1343   if ((header & 0xffe00000) != 0xffe00000)
1344     return 0;
1345 
1346   /* we don't need extension, copyright, original or
1347    * emphasis for the frame length */
1348   header >>= 6;
1349 
1350   /* mode */
1351   mode = header & 0x3;
1352   header >>= 3;
1353 
1354   /* padding */
1355   length = header & 0x1;
1356   header >>= 1;
1357 
1358   /* sampling frequency */
1359   samplerate = header & 0x3;
1360   if (samplerate == 3)
1361     return 0;
1362   header >>= 2;
1363 
1364   /* bitrate index */
1365   bitrate = header & 0xF;
1366   if (bitrate == 0 && possible_free_framelen == -1) {
1367     GST_LOG ("Possibly a free format mp3 - signaling");
1368     *may_be_free_format = TRUE;
1369   }
1370   if (bitrate == 15 || (bitrate == 0 && possible_free_framelen == -1))
1371     return 0;
1372 
1373   /* ignore error correction, too */
1374   header >>= 5;
1375 
1376   /* layer */
1377   layer = 4 - (header & 0x3);
1378   if (layer == 4)
1379     return 0;
1380   header >>= 2;
1381 
1382   /* version 0=MPEG2.5; 2=MPEG2; 3=MPEG1 */
1383   version = header & 0x3;
1384   if (version == 1)
1385     return 0;
1386 
1387   /* lookup */
1388   channels = (mode == 3) ? 1 : 2;
1389   samplerate = mp3types_freqs[version > 0 ? version - 1 : 0][samplerate];
1390   if (bitrate == 0) {
1391     /* possible freeform mp3 */
1392     if (layer == 1) {
1393       length *= 4;
1394       length += possible_free_framelen;
1395       bitrate = length * samplerate / 48000;
1396     } else {
1397       length += possible_free_framelen;
1398       bitrate = length * samplerate /
1399           ((layer == 3 && version != 3) ? 72000 : 144000);
1400     }
1401     /* freeform mp3 should have a higher-than-usually-allowed bitrate */
1402     GST_LOG ("calculated bitrate: %u, max usually: %u", bitrate,
1403         mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][14]);
1404     if (bitrate < mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][14])
1405       return 0;
1406   } else {
1407     /* calculating */
1408     bitrate = mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][bitrate];
1409     if (layer == 1) {
1410       length = ((12000 * bitrate / samplerate) + length) * 4;
1411     } else {
1412       length += ((layer == 3
1413               && version != 3) ? 72000 : 144000) * bitrate / samplerate;
1414     }
1415   }
1416 
1417   GST_LOG ("mp3typefind: calculated mp3 frame length of %u bytes", length);
1418   GST_LOG
1419       ("mp3typefind: samplerate = %u - bitrate = %u - layer = %u - version = %u"
1420       " - channels = %u", samplerate, bitrate, layer, version, channels);
1421 
1422   if (put_layer)
1423     *put_layer = layer;
1424   if (put_channels)
1425     *put_channels = channels;
1426   if (put_bitrate)
1427     *put_bitrate = bitrate;
1428   if (put_samplerate)
1429     *put_samplerate = samplerate;
1430 
1431   return length;
1432 }
1433 
1434 
1435 static GstStaticCaps mp3_caps = GST_STATIC_CAPS ("audio/mpeg, "
1436     "mpegversion = (int) 1, layer = (int) [ 1, 3 ]");
1437 #define MP3_CAPS (gst_static_caps_get(&mp3_caps))
1438 /*
1439  * random values for typefinding
1440  * if no more data is available, we will return a probability of
1441  * (found_headers/TRY_HEADERS) * (MAXIMUM * (TRY_SYNC - bytes_skipped)
1442  *        / TRY_SYNC)
1443  * if found_headers >= MIN_HEADERS
1444  */
1445 #define GST_MP3_TYPEFIND_MIN_HEADERS (2)
1446 #define GST_MP3_TYPEFIND_TRY_HEADERS (5)
1447 #define GST_MP3_TYPEFIND_TRY_SYNC (GST_TYPE_FIND_MAXIMUM * 100) /* 10kB */
1448 #define GST_MP3_TYPEFIND_SYNC_SIZE (2048)
1449 #define GST_MP3_WRONG_HEADER (10)
1450 
1451 static void
mp3_type_find_at_offset(GstTypeFind * tf,guint64 start_off,guint * found_layer,GstTypeFindProbability * found_prob)1452 mp3_type_find_at_offset (GstTypeFind * tf, guint64 start_off,
1453     guint * found_layer, GstTypeFindProbability * found_prob)
1454 {
1455   const guint8 *data = NULL;
1456   const guint8 *data_end = NULL;
1457   guint size;
1458   guint64 skipped;
1459   gint last_free_offset = -1;
1460   gint last_free_framelen = -1;
1461   gboolean headerstart = TRUE;
1462 
1463   *found_layer = 0;
1464   *found_prob = 0;
1465 
1466   size = 0;
1467   skipped = 0;
1468   while (skipped < GST_MP3_TYPEFIND_TRY_SYNC) {
1469     if (size <= 0) {
1470       size = GST_MP3_TYPEFIND_SYNC_SIZE * 2;
1471       do {
1472         size /= 2;
1473         data = gst_type_find_peek (tf, skipped + start_off, size);
1474       } while (size > 10 && !data);
1475       if (!data)
1476         break;
1477       data_end = data + size;
1478     }
1479     if (*data == 0xFF) {
1480       const guint8 *head_data = NULL;
1481       guint layer = 0, bitrate, samplerate, channels;
1482       guint found = 0;          /* number of valid headers found */
1483       guint64 offset = skipped;
1484       gboolean changed = FALSE;
1485       guint prev_layer = 0;
1486       guint prev_channels = 0, prev_samplerate = 0;
1487 
1488       while (found < GST_MP3_TYPEFIND_TRY_HEADERS) {
1489         guint32 head;
1490         guint length;
1491         gboolean free = FALSE;
1492 
1493         if ((gint64) (offset - skipped + 4) >= 0 &&
1494             data + offset - skipped + 4 < data_end) {
1495           head_data = data + offset - skipped;
1496         } else {
1497           head_data = gst_type_find_peek (tf, offset + start_off, 4);
1498         }
1499         if (!head_data)
1500           break;
1501         head = GST_READ_UINT32_BE (head_data);
1502         if (!(length = mp3_type_frame_length_from_header (head, &layer,
1503                     &channels, &bitrate, &samplerate, &free,
1504                     last_free_framelen))) {
1505           if (free) {
1506             if (last_free_offset == -1)
1507               last_free_offset = offset;
1508             else {
1509               last_free_framelen = offset - last_free_offset;
1510               offset = last_free_offset;
1511               continue;
1512             }
1513           } else {
1514             last_free_framelen = -1;
1515           }
1516 
1517           /* Mark the fact that we didn't find a valid header at the beginning */
1518           if (found == 0)
1519             headerstart = FALSE;
1520 
1521           GST_LOG ("%d. header at offset %" G_GUINT64_FORMAT
1522               " (0x%" G_GINT64_MODIFIER "x) was not an mp3 header "
1523               "(possibly-free: %s)", found + 1, start_off + offset,
1524               start_off + offset, free ? "yes" : "no");
1525           break;
1526         }
1527         if ((prev_layer && prev_layer != layer) ||
1528             /* (prev_bitrate && prev_bitrate != bitrate) || <-- VBR */
1529             (prev_samplerate && prev_samplerate != samplerate) ||
1530             (prev_channels && prev_channels != channels)) {
1531           /* this means an invalid property, or a change, which might mean
1532            * that this is not a mp3 but just a random bytestream. It could
1533            * be a freaking funky encoded mp3 though. We'll just not count
1534            * this header*/
1535           if (prev_layer)
1536             changed = TRUE;
1537         } else {
1538           found++;
1539           GST_LOG ("found %d. header at offset %" G_GUINT64_FORMAT " (0x%"
1540               G_GINT64_MODIFIER "X)", found, start_off + offset,
1541               start_off + offset);
1542         }
1543         prev_layer = layer;
1544         prev_channels = channels;
1545         prev_samplerate = samplerate;
1546 
1547         offset += length;
1548       }
1549       g_assert (found <= GST_MP3_TYPEFIND_TRY_HEADERS);
1550       if (found != 0 && head_data == NULL &&
1551           gst_type_find_peek (tf, offset + start_off - 1, 1) == NULL)
1552         /* Incomplete last frame - don't count it. */
1553         found--;
1554       if (found == GST_MP3_TYPEFIND_TRY_HEADERS ||
1555           (found >= GST_MP3_TYPEFIND_MIN_HEADERS && head_data == NULL)) {
1556         /* we can make a valid guess */
1557         guint probability = found * GST_TYPE_FIND_MAXIMUM *
1558             (GST_MP3_TYPEFIND_TRY_SYNC - skipped) /
1559             GST_MP3_TYPEFIND_TRY_HEADERS / GST_MP3_TYPEFIND_TRY_SYNC;
1560 
1561         if (!headerstart
1562             && probability > (GST_TYPE_FIND_MINIMUM + GST_MP3_WRONG_HEADER))
1563           probability -= GST_MP3_WRONG_HEADER;
1564         if (probability < GST_TYPE_FIND_MINIMUM)
1565           probability = GST_TYPE_FIND_MINIMUM;
1566         if (start_off > 0)
1567           probability /= 2;
1568         if (!changed)
1569           probability = (probability + GST_TYPE_FIND_MAXIMUM) / 2;
1570 
1571         GST_INFO
1572             ("audio/mpeg calculated %u  =  %u  *  %u / %u  *  (%u - %"
1573             G_GUINT64_FORMAT ") / %u", probability, GST_TYPE_FIND_MAXIMUM,
1574             found, GST_MP3_TYPEFIND_TRY_HEADERS, GST_MP3_TYPEFIND_TRY_SYNC,
1575             (guint64) skipped, GST_MP3_TYPEFIND_TRY_SYNC);
1576         /* make sure we're not id3 tagged */
1577         head_data = gst_type_find_peek (tf, -128, 3);
1578         if (head_data && (memcmp (head_data, "TAG", 3) == 0)) {
1579           probability = 0;
1580         }
1581         g_assert (probability <= GST_TYPE_FIND_MAXIMUM);
1582 
1583         *found_prob = probability;
1584         if (probability > 0)
1585           *found_layer = layer;
1586         return;
1587       }
1588     }
1589     data++;
1590     skipped++;
1591     size--;
1592   }
1593 }
1594 
1595 static void
mp3_type_find(GstTypeFind * tf,gpointer unused)1596 mp3_type_find (GstTypeFind * tf, gpointer unused)
1597 {
1598 /* ohos.opt.compat.0001: The demux of gstplayer does not accurately parse audio resources in the MP3 format.
1599  * As a result, the duration value cannot be obtained in the preparation phase.
1600  * Use the demux and typefind of ffmpeg to process audio resources in MP3 format.
1601  */
1602 #ifdef OHOS_OPT_COMPAT
1603   return;
1604 #endif
1605   GstTypeFindProbability prob, mid_prob;
1606   const guint8 *data;
1607   guint layer, mid_layer;
1608   guint64 length;
1609 
1610   mp3_type_find_at_offset (tf, 0, &layer, &prob);
1611   length = gst_type_find_get_length (tf);
1612 
1613   if (length == 0 || length == (guint64) - 1) {
1614     if (prob != 0)
1615       goto suggest;
1616     return;
1617   }
1618 
1619   /* if we're pretty certain already, skip the additional check */
1620   if (prob >= GST_TYPE_FIND_LIKELY)
1621     goto suggest;
1622 
1623   mp3_type_find_at_offset (tf, length / 2, &mid_layer, &mid_prob);
1624 
1625   if (mid_prob > 0) {
1626     if (prob == 0) {
1627       GST_LOG ("detected audio/mpeg only in the middle (p=%u)", mid_prob);
1628       layer = mid_layer;
1629       prob = mid_prob;
1630       goto suggest;
1631     }
1632 
1633     if (layer != mid_layer) {
1634       GST_WARNING ("audio/mpeg layer discrepancy: %u vs. %u", layer, mid_layer);
1635       return;                   /* FIXME: or should we just go with the one in the middle? */
1636     }
1637 
1638     /* detected mpeg audio both in middle of the file and at the start */
1639     prob = (prob + mid_prob) / 2;
1640     goto suggest;
1641   }
1642 
1643   /* a valid header right at the start makes it more likely
1644    * that this is actually plain mpeg-1 audio */
1645   if (prob > 0) {
1646     data = gst_type_find_peek (tf, 0, 4);       /* use min. frame size? */
1647     if (data && mp3_type_frame_length_from_header (GST_READ_UINT32_BE (data),
1648             &layer, NULL, NULL, NULL, NULL, 0) != 0) {
1649       prob = MIN (prob + 10, GST_TYPE_FIND_MAXIMUM);
1650     }
1651   }
1652 
1653   if (prob > 0)
1654     goto suggest;
1655 
1656   return;
1657 
1658 suggest:
1659   {
1660     g_return_if_fail (layer >= 1 && layer <= 3);
1661 
1662     gst_type_find_suggest_simple (tf, prob, "audio/mpeg",
1663         "mpegversion", G_TYPE_INT, 1, "layer", G_TYPE_INT, layer,
1664         "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
1665   }
1666 }
1667 
1668 /*** audio/x-musepack ***/
1669 
1670 static GstStaticCaps musepack_caps =
1671 GST_STATIC_CAPS ("audio/x-musepack, streamversion= (int) { 7, 8 }");
1672 
1673 #define MUSEPACK_CAPS (gst_static_caps_get(&musepack_caps))
1674 static void
musepack_type_find(GstTypeFind * tf,gpointer unused)1675 musepack_type_find (GstTypeFind * tf, gpointer unused)
1676 {
1677   const guint8 *data = gst_type_find_peek (tf, 0, 4);
1678   GstTypeFindProbability prop = GST_TYPE_FIND_MINIMUM;
1679   gint streamversion = -1;
1680 
1681   if (data && memcmp (data, "MP+", 3) == 0) {
1682     streamversion = 7;
1683     if ((data[3] & 0x7f) == 7) {
1684       prop = GST_TYPE_FIND_MAXIMUM;
1685     } else {
1686       prop = GST_TYPE_FIND_LIKELY + 10;
1687     }
1688   } else if (data && memcmp (data, "MPCK", 4) == 0) {
1689     streamversion = 8;
1690     prop = GST_TYPE_FIND_MAXIMUM;
1691   }
1692 
1693   if (streamversion != -1) {
1694     gst_type_find_suggest_simple (tf, prop, "audio/x-musepack",
1695         "streamversion", G_TYPE_INT, streamversion, NULL);
1696   }
1697 }
1698 
1699 /*** audio/x-ac3 ***/
1700 /* FIXME 0.11: should be audio/ac3, but isn't for backwards compatibility */
1701 static GstStaticCaps ac3_caps = GST_STATIC_CAPS ("audio/x-ac3");
1702 
1703 #define AC3_CAPS (gst_static_caps_get(&ac3_caps))
1704 
1705 static GstStaticCaps eac3_caps = GST_STATIC_CAPS ("audio/x-eac3");
1706 
1707 #define EAC3_CAPS (gst_static_caps_get(&eac3_caps))
1708 
1709 struct ac3_frmsize
1710 {
1711   unsigned short bit_rate;
1712   unsigned short frm_size[3];
1713 };
1714 
1715 static const struct ac3_frmsize ac3_frmsizecod_tbl[] = {
1716   {32, {64, 69, 96}},
1717   {32, {64, 70, 96}},
1718   {40, {80, 87, 120}},
1719   {40, {80, 88, 120}},
1720   {48, {96, 104, 144}},
1721   {48, {96, 105, 144}},
1722   {56, {112, 121, 168}},
1723   {56, {112, 122, 168}},
1724   {64, {128, 139, 192}},
1725   {64, {128, 140, 192}},
1726   {80, {160, 174, 240}},
1727   {80, {160, 175, 240}},
1728   {96, {192, 208, 288}},
1729   {96, {192, 209, 288}},
1730   {112, {224, 243, 336}},
1731   {112, {224, 244, 336}},
1732   {128, {256, 278, 384}},
1733   {128, {256, 279, 384}},
1734   {160, {320, 348, 480}},
1735   {160, {320, 349, 480}},
1736   {192, {384, 417, 576}},
1737   {192, {384, 418, 576}},
1738   {224, {448, 487, 672}},
1739   {224, {448, 488, 672}},
1740   {256, {512, 557, 768}},
1741   {256, {512, 558, 768}},
1742   {320, {640, 696, 960}},
1743   {320, {640, 697, 960}},
1744   {384, {768, 835, 1152}},
1745   {384, {768, 836, 1152}},
1746   {448, {896, 975, 1344}},
1747   {448, {896, 976, 1344}},
1748   {512, {1024, 1114, 1536}},
1749   {512, {1024, 1115, 1536}},
1750   {576, {1152, 1253, 1728}},
1751   {576, {1152, 1254, 1728}},
1752   {640, {1280, 1393, 1920}},
1753   {640, {1280, 1394, 1920}}
1754 };
1755 
1756 static void
ac3_type_find(GstTypeFind * tf,gpointer unused)1757 ac3_type_find (GstTypeFind * tf, gpointer unused)
1758 {
1759   DataScanCtx c = { 0, NULL, 0 };
1760 
1761   /* Search for an ac3 frame; not necessarily right at the start, but give it
1762    * a lower probability if not found right at the start. Check that the
1763    * frame is followed by a second frame at the expected offset.
1764    * We could also check the two ac3 CRCs, but we don't do that right now */
1765   while (c.offset < 1024) {
1766     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
1767       break;
1768 
1769     if (c.data[0] == 0x0b && c.data[1] == 0x77) {
1770       guint bsid = c.data[5] >> 3;
1771 
1772       if (bsid <= 8) {
1773         /* ac3 */
1774         guint fscod = c.data[4] >> 6;
1775         guint frmsizecod = c.data[4] & 0x3f;
1776 
1777         if (fscod < 3 && frmsizecod < 38) {
1778           DataScanCtx c_next = c;
1779           guint frame_size;
1780 
1781           frame_size = ac3_frmsizecod_tbl[frmsizecod].frm_size[fscod];
1782           GST_LOG ("possible AC3 frame sync at offset %"
1783               G_GUINT64_FORMAT ", size=%u", c.offset, frame_size);
1784           if (data_scan_ctx_ensure_data (tf, &c_next, (frame_size * 2) + 5)) {
1785             data_scan_ctx_advance (tf, &c_next, frame_size * 2);
1786 
1787             if (c_next.data[0] == 0x0b && c_next.data[1] == 0x77) {
1788               fscod = c_next.data[4] >> 6;
1789               frmsizecod = c_next.data[4] & 0x3f;
1790 
1791               if (fscod < 3 && frmsizecod < 38) {
1792                 GstTypeFindProbability prob;
1793 
1794                 GST_LOG ("found second AC3 frame (size=%u), looks good",
1795                     ac3_frmsizecod_tbl[frmsizecod].frm_size[fscod]);
1796                 if (c.offset == 0)
1797                   prob = GST_TYPE_FIND_MAXIMUM;
1798                 else
1799                   prob = GST_TYPE_FIND_NEARLY_CERTAIN;
1800 
1801                 gst_type_find_suggest (tf, prob, AC3_CAPS);
1802                 return;
1803               }
1804             } else {
1805               GST_LOG ("no second AC3 frame found, false sync");
1806             }
1807           }
1808         }
1809       } else if (bsid <= 16 && bsid > 10) {
1810         /* eac3 */
1811         DataScanCtx c_next = c;
1812         guint frame_size;
1813 
1814         frame_size = (((c.data[2] & 0x07) << 8) + c.data[3]) + 1;
1815         GST_LOG ("possible E-AC3 frame sync at offset %"
1816             G_GUINT64_FORMAT ", size=%u", c.offset, frame_size);
1817         if (data_scan_ctx_ensure_data (tf, &c_next, (frame_size * 2) + 5)) {
1818           data_scan_ctx_advance (tf, &c_next, frame_size * 2);
1819 
1820           if (c_next.data[0] == 0x0b && c_next.data[1] == 0x77) {
1821             GstTypeFindProbability prob;
1822 
1823             GST_LOG ("found second E-AC3 frame, looks good");
1824             if (c.offset == 0)
1825               prob = GST_TYPE_FIND_MAXIMUM;
1826             else
1827               prob = GST_TYPE_FIND_NEARLY_CERTAIN;
1828 
1829             gst_type_find_suggest (tf, prob, EAC3_CAPS);
1830             return;
1831           } else {
1832             GST_LOG ("no second E-AC3 frame found, false sync");
1833           }
1834         }
1835       } else {
1836         GST_LOG ("invalid AC3 BSID: %u", bsid);
1837       }
1838     }
1839     data_scan_ctx_advance (tf, &c, 1);
1840   }
1841 }
1842 
1843 /*** audio/x-dts ***/
1844 static GstStaticCaps dts_caps = GST_STATIC_CAPS ("audio/x-dts");
1845 #define DTS_CAPS (gst_static_caps_get (&dts_caps))
1846 #define DTS_MIN_FRAMESIZE 96
1847 #define DTS_MAX_FRAMESIZE 18725 /* 16384*16/14 */
1848 
1849 static gboolean
dts_parse_frame_header(DataScanCtx * c,guint * frame_size,guint * sample_rate,guint * channels,guint * depth,guint * endianness)1850 dts_parse_frame_header (DataScanCtx * c, guint * frame_size,
1851     guint * sample_rate, guint * channels, guint * depth, guint * endianness)
1852 {
1853   static const int sample_rates[16] = { 0, 8000, 16000, 32000, 0, 0, 11025,
1854     22050, 44100, 0, 0, 12000, 24000, 48000, 96000, 192000
1855   };
1856   static const guint8 channels_table[16] = { 1, 2, 2, 2, 2, 3, 3, 4, 4, 5,
1857     6, 6, 6, 7, 8, 8
1858   };
1859   guint16 hdr[8];
1860   guint32 marker;
1861   guint num_blocks, chans, lfe, i;
1862 
1863   marker = GST_READ_UINT32_BE (c->data);
1864 
1865   /* raw big endian or 14-bit big endian */
1866   if (marker == 0x7FFE8001 || marker == 0x1FFFE800) {
1867     *endianness = G_BIG_ENDIAN;
1868     for (i = 0; i < G_N_ELEMENTS (hdr); ++i)
1869       hdr[i] = GST_READ_UINT16_BE (c->data + (i * sizeof (guint16)));
1870   } else
1871     /* raw little endian or 14-bit little endian */
1872   if (marker == 0xFE7F0180 || marker == 0xFF1F00E8) {
1873     *endianness = G_LITTLE_ENDIAN;
1874     for (i = 0; i < G_N_ELEMENTS (hdr); ++i)
1875       hdr[i] = GST_READ_UINT16_LE (c->data + (i * sizeof (guint16)));
1876   } else {
1877     return FALSE;
1878   }
1879 
1880   GST_LOG ("dts sync marker 0x%08x at offset %u", marker, (guint) c->offset);
1881 
1882   /* 14-bit mode */
1883   if (marker == 0x1FFFE800 || marker == 0xFF1F00E8) {
1884     if ((hdr[2] & 0xFFF0) != 0x07F0)
1885       return FALSE;
1886     /* discard top 2 bits (2 void), shift in 2 */
1887     hdr[0] = (hdr[0] << 2) | ((hdr[1] >> 12) & 0x0003);
1888     /* discard top 4 bits (2 void, 2 shifted into hdr[0]), shift in 4 etc. */
1889     hdr[1] = (hdr[1] << 4) | ((hdr[2] >> 10) & 0x000F);
1890     hdr[2] = (hdr[2] << 6) | ((hdr[3] >> 8) & 0x003F);
1891     hdr[3] = (hdr[3] << 8) | ((hdr[4] >> 6) & 0x00FF);
1892     hdr[4] = (hdr[4] << 10) | ((hdr[5] >> 4) & 0x03FF);
1893     hdr[5] = (hdr[5] << 12) | ((hdr[6] >> 2) & 0x0FFF);
1894     hdr[6] = (hdr[6] << 14) | ((hdr[7] >> 0) & 0x3FFF);
1895     g_assert (hdr[0] == 0x7FFE && hdr[1] == 0x8001);
1896     *depth = 14;
1897   } else {
1898     *depth = 16;
1899   }
1900 
1901   GST_LOG ("frame header: %04x%04x%04x%04x", hdr[2], hdr[3], hdr[4], hdr[5]);
1902 
1903   num_blocks = (hdr[2] >> 2) & 0x7F;
1904   *frame_size = (((hdr[2] & 0x03) << 12) | (hdr[3] >> 4)) + 1;
1905   chans = ((hdr[3] & 0x0F) << 2) | (hdr[4] >> 14);
1906   *sample_rate = sample_rates[(hdr[4] >> 10) & 0x0F];
1907   lfe = (hdr[5] >> 9) & 0x03;
1908 
1909   if (num_blocks < 5 || *frame_size < 96 || *sample_rate == 0)
1910     return FALSE;
1911 
1912   if (marker == 0x1FFFE800 || marker == 0xFF1F00E8)
1913     *frame_size = (*frame_size * 16) / 14;      /* FIXME: round up? */
1914 
1915   if (chans < G_N_ELEMENTS (channels_table))
1916     *channels = channels_table[chans] + ((lfe) ? 1 : 0);
1917   else
1918     *channels = 0;
1919 
1920   return TRUE;
1921 }
1922 
1923 static void
dts_type_find(GstTypeFind * tf,gpointer unused)1924 dts_type_find (GstTypeFind * tf, gpointer unused)
1925 {
1926   DataScanCtx c = { 0, NULL, 0 };
1927 
1928   /* Search for an dts frame; not necessarily right at the start, but give it
1929    * a lower probability if not found right at the start. Check that the
1930    * frame is followed by a second frame at the expected offset. */
1931   while (c.offset <= DTS_MAX_FRAMESIZE) {
1932     guint frame_size = 0, rate = 0, chans = 0, depth = 0, endianness = 0;
1933 
1934     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, DTS_MIN_FRAMESIZE)))
1935       return;
1936 
1937     if (G_UNLIKELY (dts_parse_frame_header (&c, &frame_size, &rate, &chans,
1938                 &depth, &endianness))) {
1939       GstTypeFindProbability prob;
1940       DataScanCtx next_c;
1941 
1942       prob = (c.offset == 0) ? GST_TYPE_FIND_LIKELY : GST_TYPE_FIND_POSSIBLE;
1943 
1944       /* check for second frame sync */
1945       next_c = c;
1946       data_scan_ctx_advance (tf, &next_c, frame_size);
1947       if (data_scan_ctx_ensure_data (tf, &next_c, 4)) {
1948         GST_LOG ("frame size: %u 0x%04x", frame_size, frame_size);
1949         GST_MEMDUMP ("second frame sync", next_c.data, 4);
1950         if (GST_READ_UINT32_BE (c.data) == GST_READ_UINT32_BE (next_c.data))
1951           prob = GST_TYPE_FIND_MAXIMUM;
1952       }
1953 
1954       if (chans > 0) {
1955         gst_type_find_suggest_simple (tf, prob, "audio/x-dts",
1956             "rate", G_TYPE_INT, rate, "channels", G_TYPE_INT, chans,
1957             "depth", G_TYPE_INT, depth, "endianness", G_TYPE_INT, endianness,
1958             "framed", G_TYPE_BOOLEAN, FALSE, NULL);
1959       } else {
1960         gst_type_find_suggest_simple (tf, prob, "audio/x-dts",
1961             "rate", G_TYPE_INT, rate, "depth", G_TYPE_INT, depth,
1962             "endianness", G_TYPE_INT, endianness,
1963             "framed", G_TYPE_BOOLEAN, FALSE, NULL);
1964       }
1965 
1966       return;
1967     }
1968 
1969     data_scan_ctx_advance (tf, &c, 1);
1970   }
1971 }
1972 
1973 /*** gsm ***/
1974 
1975 /* can only be detected by using the extension, in which case we use the default
1976  * GSM properties */
1977 static GstStaticCaps gsm_caps =
1978 GST_STATIC_CAPS ("audio/x-gsm, rate=8000, channels=1");
1979 
1980 #define GSM_CAPS (gst_static_caps_get(&gsm_caps))
1981 
1982 /*** wavpack ***/
1983 
1984 static GstStaticCaps wavpack_caps =
1985 GST_STATIC_CAPS ("audio/x-wavpack, framed = (boolean) false");
1986 
1987 #define WAVPACK_CAPS (gst_static_caps_get(&wavpack_caps))
1988 
1989 static GstStaticCaps wavpack_correction_caps =
1990 GST_STATIC_CAPS ("audio/x-wavpack-correction, framed = (boolean) false");
1991 
1992 #define WAVPACK_CORRECTION_CAPS (gst_static_caps_get(&wavpack_correction_caps))
1993 
1994 static void
wavpack_type_find(GstTypeFind * tf,gpointer unused)1995 wavpack_type_find (GstTypeFind * tf, gpointer unused)
1996 {
1997   GstTypeFindProbability base_prob = GST_TYPE_FIND_POSSIBLE;
1998   guint64 offset;
1999   guint32 blocksize;
2000   const guint8 *data;
2001   guint count_wv, count_wvc;
2002 
2003   data = gst_type_find_peek (tf, 0, 32);
2004   if (!data)
2005     return;
2006 
2007   if (data[0] != 'w' || data[1] != 'v' || data[2] != 'p' || data[3] != 'k')
2008     return;
2009 
2010   /* Note: wavpack blocks can be fairly large (easily 60-110k), possibly
2011    * larger than the max. limits imposed by certain typefinding elements
2012    * like id3demux or apedemux, so typefinding is most likely only going to
2013    * work in pull-mode */
2014   blocksize = GST_READ_UINT32_LE (data + 4);
2015   GST_LOG ("wavpack header, blocksize=0x%04x", blocksize);
2016   /* If bigger than maximum allowed blocksize, refuse */
2017   if (blocksize > 131072)
2018     return;
2019   count_wv = 0;
2020   count_wvc = 0;
2021   offset = 32;
2022   while (offset < 8 + blocksize) {
2023     guint32 sublen;
2024 
2025     /* get chunk header */
2026     GST_LOG ("peeking at chunk at offset 0x%04x", (guint) offset);
2027     data = gst_type_find_peek (tf, offset, 4);
2028     if (data == NULL)
2029       break;
2030     sublen = ((guint32) data[1]) << 1;
2031     if (data[0] & 0x80) {
2032       sublen |= (((guint32) data[2]) << 9) | (((guint32) data[3]) << 17);
2033       sublen += 1 + 3;          /* id + length */
2034     } else {
2035       sublen += 1 + 1;          /* id + length */
2036     }
2037     if (offset + sublen > 8 + blocksize) {
2038       GST_LOG ("chunk length too big (%u > %" G_GUINT64_FORMAT ")", sublen,
2039           blocksize - offset);
2040       break;
2041     }
2042     if ((data[0] & 0x20) == 0) {
2043       switch (data[0] & 0x0f) {
2044         case 0xa:              /* ID_WV_BITSTREAM  */
2045         case 0xc:              /* ID_WVX_BITSTREAM */
2046           ++count_wv;
2047           break;
2048         case 0xb:              /* ID_WVC_BITSTREAM */
2049           ++count_wvc;
2050           break;
2051         default:
2052           break;
2053       }
2054       if (count_wv >= 5 || count_wvc >= 5)
2055         break;
2056     }
2057     offset += sublen;
2058   }
2059 
2060   /* check for second block header */
2061   data = gst_type_find_peek (tf, 8 + blocksize, 4);
2062   if (data != NULL && memcmp (data, "wvpk", 4) == 0) {
2063     GST_DEBUG ("found second block sync");
2064     base_prob = GST_TYPE_FIND_LIKELY;
2065   }
2066 
2067   GST_DEBUG ("wvc=%d, wv=%d", count_wvc, count_wv);
2068 
2069   if (count_wvc > 0 && count_wvc > count_wv) {
2070     gst_type_find_suggest (tf,
2071         MIN (base_prob + 5 * count_wvc, GST_TYPE_FIND_NEARLY_CERTAIN),
2072         WAVPACK_CORRECTION_CAPS);
2073   } else if (count_wv > 0) {
2074     gst_type_find_suggest (tf,
2075         MIN (base_prob + 5 * count_wv, GST_TYPE_FIND_NEARLY_CERTAIN),
2076         WAVPACK_CAPS);
2077   }
2078 }
2079 
2080 /*** application/postscrip ***/
2081 static GstStaticCaps postscript_caps =
2082 GST_STATIC_CAPS ("application/postscript");
2083 
2084 #define POSTSCRIPT_CAPS (gst_static_caps_get(&postscript_caps))
2085 
2086 static void
postscript_type_find(GstTypeFind * tf,gpointer unused)2087 postscript_type_find (GstTypeFind * tf, gpointer unused)
2088 {
2089   const guint8 *data = gst_type_find_peek (tf, 0, 3);
2090   if (!data)
2091     return;
2092 
2093   if (data[0] == 0x04)
2094     data++;
2095   if (data[0] == '%' && data[1] == '!')
2096     gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, POSTSCRIPT_CAPS);
2097 
2098 }
2099 
2100 /*** image/svg+xml ***/
2101 static GstStaticCaps svg_caps = GST_STATIC_CAPS ("image/svg+xml");
2102 
2103 #define SVG_CAPS (gst_static_caps_get(&svg_caps))
2104 
2105 static void
svg_type_find(GstTypeFind * tf,gpointer unused)2106 svg_type_find (GstTypeFind * tf, gpointer unused)
2107 {
2108   static const gchar svg_doctype[] = "!DOCTYPE svg";
2109   static const gchar svg_tag[] = "<svg";
2110   DataScanCtx c = { 0, NULL, 0 };
2111 
2112   while (c.offset <= 1024) {
2113     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 12)))
2114       break;
2115 
2116     if (memcmp (svg_doctype, c.data, 12) == 0) {
2117       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SVG_CAPS);
2118       return;
2119     } else if (memcmp (svg_tag, c.data, 4) == 0) {
2120       gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, SVG_CAPS);
2121       return;
2122     }
2123     data_scan_ctx_advance (tf, &c, 1);
2124   }
2125 }
2126 
2127 /*** multipart/x-mixed-replace mimestream ***/
2128 
2129 static GstStaticCaps multipart_caps =
2130 GST_STATIC_CAPS ("multipart/x-mixed-replace");
2131 #define MULTIPART_CAPS gst_static_caps_get(&multipart_caps)
2132 
2133 /* multipart/x-mixed replace is:
2134  *   <maybe some whitespace>--<some ascii chars>[\r]\n
2135  *   <more ascii chars>[\r]\nContent-type:<more ascii>[\r]\n */
2136 static void
multipart_type_find(GstTypeFind * tf,gpointer unused)2137 multipart_type_find (GstTypeFind * tf, gpointer unused)
2138 {
2139   const guint8 *data;
2140   const guint8 *x;
2141 
2142 #define MULTIPART_MAX_BOUNDARY_OFFSET 16
2143   data = gst_type_find_peek (tf, 0, MULTIPART_MAX_BOUNDARY_OFFSET);
2144   if (!data)
2145     return;
2146 
2147   for (x = data;
2148       x - data < MULTIPART_MAX_BOUNDARY_OFFSET - 2 && g_ascii_isspace (*x);
2149       x++);
2150   if (x[0] != '-' || x[1] != '-')
2151     return;
2152 
2153   /* Could be okay, peek what should be enough for a complete header */
2154 #define MULTIPART_MAX_HEADER_SIZE 256
2155   data = gst_type_find_peek (tf, 0, MULTIPART_MAX_HEADER_SIZE);
2156   if (!data)
2157     return;
2158 
2159   for (x = data; x - data < MULTIPART_MAX_HEADER_SIZE - 14; x++) {
2160     if (!isascii (*x)) {
2161       return;
2162     }
2163     if (*x == '\n' &&
2164         !g_ascii_strncasecmp ("content-type:", (gchar *) x + 1, 13)) {
2165       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MULTIPART_CAPS);
2166       return;
2167     }
2168   }
2169 }
2170 
2171 /*** video/mpeg systemstream ***/
2172 static GstStaticCaps mpeg_sys_caps = GST_STATIC_CAPS ("video/mpeg, "
2173     "systemstream = (boolean) true, mpegversion = (int) [ 1, 2 ]");
2174 
2175 #define MPEG_SYS_CAPS gst_static_caps_get(&mpeg_sys_caps)
2176 #define IS_MPEG_HEADER(data) (G_UNLIKELY((((guint8 *)(data))[0] == 0x00) &&  \
2177                                          (((guint8 *)(data))[1] == 0x00) &&  \
2178                                          (((guint8 *)(data))[2] == 0x01)))
2179 
2180 #define IS_MPEG_PACK_CODE(b) ((b) == 0xBA)
2181 #define IS_MPEG_SYS_CODE(b) ((b) == 0xBB)
2182 #define IS_MPEG_PACK_HEADER(data)       (IS_MPEG_HEADER (data) &&            \
2183                                          IS_MPEG_PACK_CODE (((guint8 *)(data))[3]))
2184 
2185 #define IS_MPEG_PES_CODE(b) (((b) & 0xF0) == 0xE0 || ((b) & 0xF0) == 0xC0 || \
2186                              (b) >= 0xBD)
2187 #define IS_MPEG_PES_HEADER(data)        (IS_MPEG_HEADER (data) &&            \
2188                                          IS_MPEG_PES_CODE (((guint8 *)(data))[3]))
2189 
2190 #define MPEG2_MAX_PROBE_LENGTH (128 * 1024)     /* 128kB should be 64 packs of the
2191                                                  * most common 2kB pack size. */
2192 
2193 #define MPEG2_MIN_SYS_HEADERS 2
2194 #define MPEG2_MAX_SYS_HEADERS 5
2195 
2196 static gboolean
mpeg_sys_is_valid_pack(GstTypeFind * tf,const guint8 * data,guint len,guint * pack_size)2197 mpeg_sys_is_valid_pack (GstTypeFind * tf, const guint8 * data, guint len,
2198     guint * pack_size)
2199 {
2200   /* Check the pack header @ offset for validity, assuming that the 4 byte header
2201    * itself has already been checked. */
2202   guint8 stuff_len;
2203 
2204   if (len < 12)
2205     return FALSE;
2206 
2207   /* Check marker bits */
2208   if ((data[4] & 0xC4) == 0x44) {
2209     /* MPEG-2 PACK */
2210     if (len < 14)
2211       return FALSE;
2212 
2213     if ((data[6] & 0x04) != 0x04 ||
2214         (data[8] & 0x04) != 0x04 ||
2215         (data[9] & 0x01) != 0x01 || (data[12] & 0x03) != 0x03)
2216       return FALSE;
2217 
2218     stuff_len = data[13] & 0x07;
2219 
2220     /* Check the following header bytes, if we can */
2221     if ((14 + stuff_len + 4) <= len) {
2222       if (!IS_MPEG_HEADER (data + 14 + stuff_len))
2223         return FALSE;
2224     }
2225     if (pack_size)
2226       *pack_size = 14 + stuff_len;
2227     return TRUE;
2228   } else if ((data[4] & 0xF1) == 0x21) {
2229     /* MPEG-1 PACK */
2230     if ((data[6] & 0x01) != 0x01 ||
2231         (data[8] & 0x01) != 0x01 ||
2232         (data[9] & 0x80) != 0x80 || (data[11] & 0x01) != 0x01)
2233       return FALSE;
2234 
2235     /* Check the following header bytes, if we can */
2236     if ((12 + 4) <= len) {
2237       if (!IS_MPEG_HEADER (data + 12))
2238         return FALSE;
2239     }
2240     if (pack_size)
2241       *pack_size = 12;
2242     return TRUE;
2243   }
2244 
2245   return FALSE;
2246 }
2247 
2248 static gboolean
mpeg_sys_is_valid_pes(GstTypeFind * tf,const guint8 * data,guint len,guint * pack_size)2249 mpeg_sys_is_valid_pes (GstTypeFind * tf, const guint8 * data, guint len,
2250     guint * pack_size)
2251 {
2252   guint pes_packet_len;
2253 
2254   /* Check the PES header at the given position, assuming the header code itself
2255    * was already checked */
2256   if (len < 6)
2257     return FALSE;
2258 
2259   /* For MPEG Program streams, unbounded PES is not allowed, so we must have a
2260    * valid length present */
2261   pes_packet_len = GST_READ_UINT16_BE (data + 4);
2262   if (pes_packet_len == 0)
2263     return FALSE;
2264 
2265   /* Check the following header, if we can */
2266   if (6 + pes_packet_len + 4 <= len) {
2267     if (!IS_MPEG_HEADER (data + 6 + pes_packet_len))
2268       return FALSE;
2269   }
2270 
2271   if (pack_size)
2272     *pack_size = 6 + pes_packet_len;
2273   return TRUE;
2274 }
2275 
2276 static gboolean
mpeg_sys_is_valid_sys(GstTypeFind * tf,const guint8 * data,guint len,guint * pack_size)2277 mpeg_sys_is_valid_sys (GstTypeFind * tf, const guint8 * data, guint len,
2278     guint * pack_size)
2279 {
2280   guint sys_hdr_len;
2281 
2282   /* Check the System header at the given position, assuming the header code itself
2283    * was already checked */
2284   if (len < 6)
2285     return FALSE;
2286   sys_hdr_len = GST_READ_UINT16_BE (data + 4);
2287   if (sys_hdr_len < 6)
2288     return FALSE;
2289 
2290   /* Check the following header, if we can */
2291   if (6 + sys_hdr_len + 4 <= len) {
2292     if (!IS_MPEG_HEADER (data + 6 + sys_hdr_len))
2293       return FALSE;
2294   }
2295 
2296   if (pack_size)
2297     *pack_size = 6 + sys_hdr_len;
2298 
2299   return TRUE;
2300 }
2301 
2302 /* calculation of possibility to identify random data as mpeg systemstream:
2303  * bits that must match in header detection:            32 (or more)
2304  * chance that random data is identifed:                1/2^32
2305  * chance that MPEG2_MIN_PACK_HEADERS headers are identified:
2306  *       1/2^(32*MPEG2_MIN_PACK_HEADERS)
2307  * chance that this happens in MPEG2_MAX_PROBE_LENGTH bytes:
2308  *       1-(1+1/2^(32*MPEG2_MIN_PACK_HEADERS)^MPEG2_MAX_PROBE_LENGTH)
2309  * for current values:
2310  *       1-(1+1/2^(32*4)^101024)
2311  *       = <some_number>
2312  * Since we also check marker bits and pes packet lengths, this probability is a
2313  * very coarse upper bound.
2314  */
2315 static void
mpeg_sys_type_find(GstTypeFind * tf,gpointer unused)2316 mpeg_sys_type_find (GstTypeFind * tf, gpointer unused)
2317 {
2318   const guint8 *data, *data0, *first_sync, *end;
2319   gint mpegversion = 0;
2320   guint pack_headers = 0;
2321   guint pes_headers = 0;
2322   guint pack_size;
2323   guint since_last_sync = 0;
2324   guint32 sync_word = 0xffffffff;
2325   guint potential_headers = 0;
2326 
2327   G_STMT_START {
2328     gint len;
2329 
2330     len = MPEG2_MAX_PROBE_LENGTH;
2331 
2332     while (len >= 16) {
2333       data = gst_type_find_peek (tf, 0, 5 + len);
2334       if (data != NULL)
2335         break;
2336       len = len / 2;
2337     }
2338 
2339     if (!data)
2340       return;
2341 
2342     end = data + len;
2343   }
2344   G_STMT_END;
2345 
2346   data0 = data;
2347   first_sync = NULL;
2348 
2349   while (data < end) {
2350     sync_word <<= 8;
2351     if (sync_word == 0x00000100) {
2352       /* Found potential sync word */
2353       if (first_sync == NULL)
2354         first_sync = data - 3;
2355 
2356       if (since_last_sync > 4) {
2357         /* If more than 4 bytes since the last sync word, reset our counters,
2358          * as we're only interested in counting contiguous packets */
2359         pes_headers = pack_headers = 0;
2360       }
2361       pack_size = 0;
2362 
2363       potential_headers++;
2364       if (IS_MPEG_PACK_CODE (data[0])) {
2365         if ((data[1] & 0xC0) == 0x40) {
2366           /* MPEG-2 */
2367           mpegversion = 2;
2368         } else if ((data[1] & 0xF0) == 0x20) {
2369           mpegversion = 1;
2370         }
2371         if (mpegversion != 0 &&
2372             mpeg_sys_is_valid_pack (tf, data - 3, end - data + 3, &pack_size)) {
2373           pack_headers++;
2374         }
2375       } else if (IS_MPEG_PES_CODE (data[0])) {
2376         /* PES stream */
2377         if (mpeg_sys_is_valid_pes (tf, data - 3, end - data + 3, &pack_size)) {
2378           pes_headers++;
2379           if (mpegversion == 0)
2380             mpegversion = 2;
2381         }
2382       } else if (IS_MPEG_SYS_CODE (data[0])) {
2383         if (mpeg_sys_is_valid_sys (tf, data - 3, end - data + 3, &pack_size)) {
2384           pack_headers++;
2385         }
2386       }
2387 
2388       /* If we found a packet with a known size, skip the bytes in it and loop
2389        * around to check the next packet. */
2390       if (pack_size != 0) {
2391         data += pack_size - 3;
2392         sync_word = 0xffffffff;
2393         since_last_sync = 0;
2394         continue;
2395       }
2396     }
2397 
2398     sync_word |= data[0];
2399     since_last_sync++;
2400     data++;
2401 
2402     /* If we have found MAX headers, and *some* were pes headers (pack headers
2403      * are optional in an mpeg system stream) then return our high-probability
2404      * result */
2405     if (pes_headers > 0 && (pack_headers + pes_headers) > MPEG2_MAX_SYS_HEADERS)
2406       goto suggest;
2407   }
2408 
2409   /* If we at least saw MIN headers, and *some* were pes headers (pack headers
2410    * are optional in an mpeg system stream) then return a lower-probability
2411    * result */
2412   if (pes_headers > 0 && (pack_headers + pes_headers) > MPEG2_MIN_SYS_HEADERS)
2413     goto suggest;
2414 
2415   return;
2416 suggest:
2417   {
2418     guint prob;
2419 
2420     prob = GST_TYPE_FIND_POSSIBLE + (10 * (pack_headers + pes_headers));
2421     prob = MIN (prob, GST_TYPE_FIND_MAXIMUM);
2422 
2423     /* With the above test, we get into problems when we try to typefind
2424        a MPEG stream from a small amount of data, which can happen when
2425        we get data pushed from a HTTP source. We thus make a second test
2426        to give higher probability if all the potential headers were either
2427        pack or pes headers (ie, no potential header was unrecognized). */
2428     if (potential_headers == pack_headers + pes_headers) {
2429       GST_LOG ("Only %u headers, but all were recognized", potential_headers);
2430       prob += 10;
2431       prob = MIN (prob, GST_TYPE_FIND_MAXIMUM);
2432     }
2433 
2434     /* lower probability if the first packet wasn't right at the start */
2435     if (data0 != first_sync && prob >= 10)
2436       prob -= 10;
2437 
2438     GST_LOG ("Suggesting MPEG %d system stream, %d packs, %d pes, prob %u%%",
2439         mpegversion, pack_headers, pes_headers, prob);
2440 
2441     gst_type_find_suggest_simple (tf, prob, "video/mpeg",
2442         "systemstream", G_TYPE_BOOLEAN, TRUE,
2443         "mpegversion", G_TYPE_INT, mpegversion, NULL);
2444   }
2445 };
2446 
2447 /*** video/mpegts Transport Stream ***/
2448 static GstStaticCaps mpegts_caps = GST_STATIC_CAPS ("video/mpegts, "
2449     "systemstream = (boolean) true, packetsize = (int) [ 188, 208 ]");
2450 #define MPEGTS_CAPS gst_static_caps_get(&mpegts_caps)
2451 
2452 #define GST_MPEGTS_TYPEFIND_MIN_HEADERS 4
2453 #define GST_MPEGTS_TYPEFIND_MAX_HEADERS 10
2454 #define GST_MPEGTS_MAX_PACKET_SIZE 208
2455 #define GST_MPEGTS_TYPEFIND_SYNC_SIZE \
2456             (GST_MPEGTS_TYPEFIND_MIN_HEADERS * GST_MPEGTS_MAX_PACKET_SIZE)
2457 #define GST_MPEGTS_TYPEFIND_MAX_SYNC \
2458             (GST_MPEGTS_TYPEFIND_MAX_HEADERS * GST_MPEGTS_MAX_PACKET_SIZE)
2459 #define GST_MPEGTS_TYPEFIND_SCAN_LENGTH \
2460             (GST_MPEGTS_TYPEFIND_MAX_SYNC * 4)
2461 
2462 #define MPEGTS_HDR_SIZE 4
2463 /* Check for sync byte, error_indicator == 0 and packet has payload.
2464  * Adaptation control field (data[3] & 0x30) may be zero for TS packets with
2465  * null PIDs. Still, these streams are valid TS streams (for null packets,
2466  * AFC is supposed to be 0x1, but the spec also says decoders should just
2467  * discard any packets with AFC = 0x00) */
2468 #define IS_MPEGTS_HEADER(data) (data[0] == 0x47 && \
2469                                 (data[1] & 0x80) == 0x00 && \
2470                                 ((data[3] & 0x30) != 0x00 || \
2471                                 ((data[3] & 0x30) == 0x00 && (data[1] & 0x1f) == 0x1f && (data[2] & 0xff) == 0xff)))
2472 
2473 /* Helper function to search ahead at intervals of packet_size for mpegts
2474  * headers */
2475 static gint
mpeg_ts_probe_headers(GstTypeFind * tf,guint64 offset,gint packet_size)2476 mpeg_ts_probe_headers (GstTypeFind * tf, guint64 offset, gint packet_size)
2477 {
2478   /* We always enter this function having found at least one header already */
2479   gint found = 1;
2480   const guint8 *data = NULL;
2481 
2482   GST_LOG ("looking for mpeg-ts packets of size %u", packet_size);
2483   while (found < GST_MPEGTS_TYPEFIND_MAX_HEADERS) {
2484     offset += packet_size;
2485 
2486     data = gst_type_find_peek (tf, offset, MPEGTS_HDR_SIZE);
2487     if (data == NULL || !IS_MPEGTS_HEADER (data))
2488       return found;
2489 
2490     found++;
2491     GST_LOG ("mpeg-ts sync #%2d at offset %" G_GUINT64_FORMAT, found, offset);
2492   }
2493 
2494   return found;
2495 }
2496 
2497 /* Try and detect at least 4 packets in at most 10 packets worth of
2498  * data. Need to try several possible packet sizes */
2499 static void
mpeg_ts_type_find(GstTypeFind * tf,gpointer unused)2500 mpeg_ts_type_find (GstTypeFind * tf, gpointer unused)
2501 {
2502   /* TS packet sizes to test: normal, DVHS packet size and
2503    * FEC with 16 or 20 byte codes packet size. */
2504   const gint pack_sizes[] = { 188, 192, 204, 208 };
2505   const guint8 *data = NULL;
2506   guint size = 0;
2507   guint64 skipped = 0;
2508 
2509   while (skipped < GST_MPEGTS_TYPEFIND_SCAN_LENGTH) {
2510     if (size < MPEGTS_HDR_SIZE) {
2511       data = gst_type_find_peek (tf, skipped, GST_MPEGTS_TYPEFIND_SYNC_SIZE);
2512       if (!data)
2513         break;
2514       size = GST_MPEGTS_TYPEFIND_SYNC_SIZE;
2515     }
2516 
2517     /* Have at least MPEGTS_HDR_SIZE bytes at this point */
2518     if (IS_MPEGTS_HEADER (data)) {
2519       gsize p;
2520 
2521       GST_LOG ("possible mpeg-ts sync at offset %" G_GUINT64_FORMAT, skipped);
2522 
2523       for (p = 0; p < G_N_ELEMENTS (pack_sizes); p++) {
2524         gint found;
2525 
2526         /* Probe ahead at size pack_sizes[p] */
2527         found = mpeg_ts_probe_headers (tf, skipped, pack_sizes[p]);
2528         if (found >= GST_MPEGTS_TYPEFIND_MIN_HEADERS) {
2529           gint probability;
2530 
2531           /* found at least 4 headers. 10 headers = MAXIMUM probability.
2532            * Arbitrarily, I assigned 10% probability for each header we
2533            * found, 40% -> 100% */
2534           probability = MIN (10 * found, GST_TYPE_FIND_MAXIMUM);
2535 
2536           gst_type_find_suggest_simple (tf, probability, "video/mpegts",
2537               "systemstream", G_TYPE_BOOLEAN, TRUE,
2538               "packetsize", G_TYPE_INT, pack_sizes[p], NULL);
2539           return;
2540         }
2541       }
2542     }
2543     data++;
2544     skipped++;
2545     size--;
2546   }
2547 }
2548 
2549 #define GST_MPEGVID_TYPEFIND_TRY_PICTURES 6
2550 #define GST_MPEGVID_TYPEFIND_TRY_SYNC (100 * 1024)      /* 100 kB */
2551 
2552 /* Scan ahead a maximum of max_extra_offset bytes until the next IS_MPEG_HEADER
2553  * offset.  After the call, offset will be after the 0x000001, i.e. at the 4th
2554  * byte of the MPEG header.  Returns TRUE if a header was found, FALSE if not.
2555  */
2556 static gboolean
mpeg_find_next_header(GstTypeFind * tf,DataScanCtx * c,guint64 max_extra_offset)2557 mpeg_find_next_header (GstTypeFind * tf, DataScanCtx * c,
2558     guint64 max_extra_offset)
2559 {
2560   guint64 extra_offset;
2561 
2562   for (extra_offset = 0; extra_offset <= max_extra_offset; ++extra_offset) {
2563     if (!data_scan_ctx_ensure_data (tf, c, 4))
2564       return FALSE;
2565     if (IS_MPEG_HEADER (c->data)) {
2566       data_scan_ctx_advance (tf, c, 3);
2567       return TRUE;
2568     }
2569     data_scan_ctx_advance (tf, c, 1);
2570   }
2571   return FALSE;
2572 }
2573 
2574 /*** video/mpeg MPEG-4 elementary video stream ***/
2575 
2576 static GstStaticCaps mpeg4_video_caps = GST_STATIC_CAPS ("video/mpeg, "
2577     "systemstream=(boolean)false, mpegversion=4, parsed=(boolean)false");
2578 #define MPEG4_VIDEO_CAPS gst_static_caps_get(&mpeg4_video_caps)
2579 
2580 /*
2581  * This typefind is based on the elementary video header defined in
2582  * http://xhelmboyx.tripod.com/formats/mpeg-layout.txt
2583  * In addition, it allows the visual object sequence header to be
2584  * absent, and even the VOS header to be absent.  In the latter case,
2585  * a number of VOPs have to be present.
2586  */
2587 static void
mpeg4_video_type_find(GstTypeFind * tf,gpointer unused)2588 mpeg4_video_type_find (GstTypeFind * tf, gpointer unused)
2589 {
2590   DataScanCtx c = { 0, NULL, 0 };
2591   gboolean seen_vios_at_0 = FALSE;
2592   gboolean seen_vios = FALSE;
2593   gboolean seen_vos = FALSE;
2594   gboolean seen_vol = FALSE;
2595   guint num_vop_headers = 0;
2596   guint8 sc;
2597 
2598   while (c.offset < GST_MPEGVID_TYPEFIND_TRY_SYNC) {
2599     if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
2600       break;
2601 
2602     if (!mpeg_find_next_header (tf, &c,
2603             GST_MPEGVID_TYPEFIND_TRY_SYNC - c.offset))
2604       break;
2605 
2606     sc = c.data[0];
2607 
2608     /* visual_object_sequence_start_code */
2609     if (sc == 0xB0) {
2610       if (seen_vios)
2611         break;                  /* Terminate at second vios */
2612       if (c.offset == 0)
2613         seen_vios_at_0 = TRUE;
2614       seen_vios = TRUE;
2615       data_scan_ctx_advance (tf, &c, 2);
2616       if (!mpeg_find_next_header (tf, &c, 0))
2617         break;
2618 
2619       sc = c.data[0];
2620 
2621       /* Optional metadata */
2622       if (sc == 0xB2)
2623         if (!mpeg_find_next_header (tf, &c, 24))
2624           break;
2625     }
2626 
2627     /* visual_object_start_code (consider it optional) */
2628     if (sc == 0xB5) {
2629       data_scan_ctx_advance (tf, &c, 2);
2630       /* may contain ID marker and YUV clamping */
2631       if (!mpeg_find_next_header (tf, &c, 7))
2632         break;
2633 
2634       sc = c.data[0];
2635     }
2636 
2637     /* video_object_start_code */
2638     if (sc <= 0x1F) {
2639       if (seen_vos)
2640         break;                  /* Terminate at second vos */
2641       seen_vos = TRUE;
2642       data_scan_ctx_advance (tf, &c, 2);
2643       continue;
2644     }
2645 
2646     /* video_object_layer_start_code */
2647     if (sc >= 0x20 && sc <= 0x2F) {
2648       seen_vol = TRUE;
2649       data_scan_ctx_advance (tf, &c, 5);
2650       continue;
2651     }
2652 
2653     /* video_object_plane_start_code */
2654     if (sc == 0xB6) {
2655       num_vop_headers++;
2656       data_scan_ctx_advance (tf, &c, 2);
2657       continue;
2658     }
2659 
2660     /* Unknown start code. */
2661   }
2662 
2663   if (num_vop_headers > 0 || seen_vol) {
2664     GstTypeFindProbability probability = 0;
2665 
2666     GST_LOG ("Found %d pictures, vios: %d, vos:%d, vol:%d", num_vop_headers,
2667         seen_vios, seen_vos, seen_vol);
2668 
2669     if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vios_at_0
2670         && seen_vos && seen_vol)
2671       probability = GST_TYPE_FIND_MAXIMUM - 1;
2672     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vios
2673         && seen_vos && seen_vol)
2674       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 1;
2675     else if (seen_vios_at_0 && seen_vos && seen_vol)
2676       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 6;
2677     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vos
2678         && seen_vol)
2679       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 6;
2680     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vol)
2681       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 9;
2682     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
2683       probability = GST_TYPE_FIND_LIKELY - 1;
2684     else if (num_vop_headers > 2 && seen_vios && seen_vos && seen_vol)
2685       probability = GST_TYPE_FIND_LIKELY - 9;
2686     else if (seen_vios && seen_vos && seen_vol)
2687       probability = GST_TYPE_FIND_LIKELY - 20;
2688     else if (num_vop_headers > 0 && seen_vos && seen_vol)
2689       probability = GST_TYPE_FIND_POSSIBLE;
2690     else if (num_vop_headers > 0)
2691       probability = GST_TYPE_FIND_POSSIBLE - 10;
2692     else if (seen_vos && seen_vol)
2693       probability = GST_TYPE_FIND_POSSIBLE - 20;
2694 
2695     gst_type_find_suggest (tf, probability, MPEG4_VIDEO_CAPS);
2696   }
2697 }
2698 
2699 /*** video/x-h263 H263 video stream ***/
2700 static GstStaticCaps h263_video_caps =
2701 GST_STATIC_CAPS ("video/x-h263, variant=(string)itu");
2702 
2703 #define H263_VIDEO_CAPS gst_static_caps_get(&h263_video_caps)
2704 
2705 #define H263_MAX_PROBE_LENGTH (128 * 1024)
2706 
2707 static void
h263_video_type_find(GstTypeFind * tf,gpointer unused)2708 h263_video_type_find (GstTypeFind * tf, gpointer unused)
2709 {
2710   DataScanCtx c = { 0, NULL, 0 };
2711   guint64 data = 0xffff;        /* prevents false positive for first 2 bytes */
2712   guint64 psc = 0;
2713   guint8 ptype = 0;
2714   guint format;
2715   guint good = 0;
2716   guint bad = 0;
2717   guint pc_type, pb_mode;
2718 
2719   while (c.offset < H263_MAX_PROBE_LENGTH) {
2720     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
2721       break;
2722 
2723     /* Find the picture start code */
2724     data = (data << 8) + c.data[0];
2725     psc = data & G_GUINT64_CONSTANT (0xfffffc0000);
2726     if (psc == 0x800000) {
2727       /* Found PSC */
2728       /* PTYPE */
2729       ptype = (data & 0x3fc) >> 2;
2730       /* Source Format */
2731       format = ptype & 0x07;
2732 
2733       /* Now that we have a Valid PSC, check if we also have a valid PTYPE and
2734          the Source Format, which should range between 1 and 5 */
2735       if (((ptype >> 6) == 0x2) && (format > 0 && format < 6)) {
2736         pc_type = data & 0x02;
2737         pb_mode = c.data[1] & 0x20 >> 4;
2738         if (!pc_type && pb_mode)
2739           bad++;
2740         else
2741           good++;
2742       } else
2743         bad++;
2744 
2745       /* FIXME: maybe bail out early if we get mostly bad syncs ? */
2746     }
2747 
2748     data_scan_ctx_advance (tf, &c, 1);
2749   }
2750 
2751   GST_LOG ("good: %d, bad: %d", good, bad);
2752 
2753   if (good > 2 * bad)
2754     gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, H263_VIDEO_CAPS);
2755 
2756   return;
2757 }
2758 
2759 /*** video/x-h264 H264 elementary video stream ***/
2760 
2761 static GstStaticCaps h264_video_caps =
2762 GST_STATIC_CAPS ("video/x-h264,stream-format=byte-stream");
2763 
2764 #define H264_VIDEO_CAPS gst_static_caps_get(&h264_video_caps)
2765 
2766 #define H264_MAX_PROBE_LENGTH (128 * 1024)      /* 128kB for HD should be enough. */
2767 
2768 static void
h264_video_type_find(GstTypeFind * tf,gpointer unused)2769 h264_video_type_find (GstTypeFind * tf, gpointer unused)
2770 {
2771   DataScanCtx c = { 0, NULL, 0 };
2772 
2773   /* Stream consists of: a series of sync codes (00 00 00 01) followed
2774    * by NALs
2775    */
2776   gboolean seen_idr = FALSE;
2777   gboolean seen_sps = FALSE;
2778   gboolean seen_pps = FALSE;
2779   gboolean seen_ssps = FALSE;
2780   int nut, ref;
2781   int good = 0;
2782   int bad = 0;
2783 
2784   while (c.offset < H264_MAX_PROBE_LENGTH) {
2785     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
2786       break;
2787 
2788     if (IS_MPEG_HEADER (c.data)) {
2789       nut = c.data[3] & 0x9f;   /* forbiden_zero_bit | nal_unit_type */
2790       ref = c.data[3] & 0x60;   /* nal_ref_idc */
2791 
2792       /* if forbidden bit is different to 0 won't be h264 */
2793       if (nut > 0x1f) {
2794         bad++;
2795         break;
2796       }
2797 
2798       /* collect statistics about the NAL types */
2799       if ((nut >= 1 && nut <= 13) || nut == 19) {
2800         if ((nut == 5 && ref == 0) ||
2801             ((nut == 6 || (nut >= 9 && nut <= 12)) && ref != 0)) {
2802           bad++;
2803         } else {
2804           if (nut == 7)
2805             seen_sps = TRUE;
2806           else if (nut == 8)
2807             seen_pps = TRUE;
2808           else if (nut == 5)
2809             seen_idr = TRUE;
2810 
2811           good++;
2812         }
2813       } else if (nut >= 14 && nut <= 33) {
2814         if (nut == 15) {
2815           seen_ssps = TRUE;
2816           good++;
2817         } else if (nut == 14 || nut == 20) {
2818           /* Sometimes we see NAL 14 or 20 without SSPS
2819            * if dropped into the middle of a stream -
2820            * just ignore those (don't add to bad count) */
2821           if (seen_ssps)
2822             good++;
2823         } else {
2824           /* reserved */
2825           /* Theoretically these are good, since if they exist in the
2826              stream it merely means that a newer backwards-compatible
2827              h.264 stream.  But we should be identifying that separately. */
2828           bad++;
2829         }
2830       } else {
2831         /* unspecified, application specific */
2832         /* don't consider these bad */
2833       }
2834 
2835       GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d ssps:%d", good, bad,
2836           seen_pps, seen_sps, seen_idr, seen_ssps);
2837 
2838       if (seen_sps && seen_pps && seen_idr && good >= 10 && bad < 4) {
2839         gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, H264_VIDEO_CAPS);
2840         return;
2841       }
2842 
2843       data_scan_ctx_advance (tf, &c, 4);
2844     }
2845     data_scan_ctx_advance (tf, &c, 1);
2846   }
2847 
2848   GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d ssps=%d", good, bad,
2849       seen_pps, seen_sps, seen_idr, seen_ssps);
2850 
2851   if (good >= 2 && bad == 0) {
2852     gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, H264_VIDEO_CAPS);
2853   }
2854 }
2855 
2856 /*** video/x-h265 H265 elementary video stream ***/
2857 
2858 static GstStaticCaps h265_video_caps =
2859 GST_STATIC_CAPS ("video/x-h265,stream-format=byte-stream");
2860 
2861 #define H265_VIDEO_CAPS gst_static_caps_get(&h265_video_caps)
2862 
2863 #define H265_MAX_PROBE_LENGTH (128 * 1024)      /* 128kB for HD should be enough. */
2864 
2865 static void
h265_video_type_find(GstTypeFind * tf,gpointer unused)2866 h265_video_type_find (GstTypeFind * tf, gpointer unused)
2867 {
2868   DataScanCtx c = { 0, NULL, 0 };
2869 
2870   /* Stream consists of: a series of sync codes (00 00 00 01) followed
2871    * by NALs
2872    */
2873   gboolean seen_irap = FALSE;
2874   gboolean seen_vps = FALSE;
2875   gboolean seen_sps = FALSE;
2876   gboolean seen_pps = FALSE;
2877   int nut;
2878   int good = 0;
2879   int bad = 0;
2880 
2881   while (c.offset < H265_MAX_PROBE_LENGTH) {
2882     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 5)))
2883       break;
2884 
2885     if (IS_MPEG_HEADER (c.data)) {
2886       /* forbiden_zero_bit | nal_unit_type */
2887       nut = c.data[3] & 0xfe;
2888 
2889       /* if forbidden bit is different to 0 won't be h265 */
2890       if (nut > 0x7e) {
2891         bad++;
2892         break;
2893       }
2894       nut = nut >> 1;
2895 
2896       /* if nuh_layer_id is not zero or nuh_temporal_id_plus1 is zero then
2897        * it won't be h265 */
2898       if ((c.data[3] & 0x01) || (c.data[4] & 0xf8) || !(c.data[4] & 0x07)) {
2899         bad++;
2900         break;
2901       }
2902 
2903       /* collect statistics about the NAL types */
2904       if ((nut >= 0 && nut <= 9) || (nut >= 16 && nut <= 21) || (nut >= 32
2905               && nut <= 40)) {
2906         if (nut == 32)
2907           seen_vps = TRUE;
2908         else if (nut == 33)
2909           seen_sps = TRUE;
2910         else if (nut == 34)
2911           seen_pps = TRUE;
2912         else if (nut >= 16 && nut <= 21) {
2913           /* BLA, IDR and CRA pictures are belongs to be IRAP picture */
2914           /* we are not counting the reserved IRAP pictures (22 and 23) to good */
2915           seen_irap = TRUE;
2916         }
2917 
2918         good++;
2919       } else if ((nut >= 10 && nut <= 15) || (nut >= 22 && nut <= 31)
2920           || (nut >= 41 && nut <= 47)) {
2921         /* reserved values are counting as bad */
2922         bad++;
2923       } else {
2924         /* unspecified (48..63), application specific */
2925         /* don't consider these as bad */
2926       }
2927 
2928       GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, vps:%d, irap:%d", good, bad,
2929           seen_pps, seen_sps, seen_vps, seen_irap);
2930 
2931       if (seen_sps && seen_pps && seen_irap && good >= 10 && bad < 4) {
2932         gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, H265_VIDEO_CAPS);
2933         return;
2934       }
2935 
2936       data_scan_ctx_advance (tf, &c, 5);
2937     }
2938     data_scan_ctx_advance (tf, &c, 1);
2939   }
2940 
2941   GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, vps:%d, irap:%d", good, bad,
2942       seen_pps, seen_sps, seen_vps, seen_irap);
2943 
2944   if (good >= 2 && bad == 0) {
2945     gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, H265_VIDEO_CAPS);
2946   }
2947 }
2948 
2949 /*** video/mpeg video stream ***/
2950 
2951 static GstStaticCaps mpeg_video_caps = GST_STATIC_CAPS ("video/mpeg, "
2952     "systemstream = (boolean) false");
2953 #define MPEG_VIDEO_CAPS gst_static_caps_get(&mpeg_video_caps)
2954 
2955 /*
2956  * Idea is the same as MPEG system stream typefinding: We check each
2957  * byte of the stream to see if - from that point on - the stream
2958  * matches a predefined set of marker bits as defined in the MPEG
2959  * video specs.
2960  *
2961  * I'm sure someone will do a chance calculation here too.
2962  */
2963 
2964 static void
mpeg_video_stream_type_find(GstTypeFind * tf,gpointer unused)2965 mpeg_video_stream_type_find (GstTypeFind * tf, gpointer unused)
2966 {
2967   DataScanCtx c = { 0, NULL, 0 };
2968   gboolean seen_seq_at_0 = FALSE;
2969   gboolean seen_seq = FALSE;
2970   gboolean seen_gop = FALSE;
2971   guint64 last_pic_offset = 0;
2972   gint num_pic_headers = 0;
2973   gint found = 0;
2974 
2975   while (c.offset < GST_MPEGVID_TYPEFIND_TRY_SYNC) {
2976     if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
2977       break;
2978 
2979     if (!data_scan_ctx_ensure_data (tf, &c, 5))
2980       break;
2981 
2982     if (!IS_MPEG_HEADER (c.data))
2983       goto next;
2984 
2985     /* a pack header indicates that this isn't an elementary stream */
2986     if (c.data[3] == 0xBA && mpeg_sys_is_valid_pack (tf, c.data, c.size, NULL))
2987       return;
2988 
2989     /* do we have a sequence header? */
2990     if (c.data[3] == 0xB3) {
2991       seen_seq_at_0 = seen_seq_at_0 || (c.offset == 0);
2992       seen_seq = TRUE;
2993       data_scan_ctx_advance (tf, &c, 4 + 8);
2994       continue;
2995     }
2996 
2997     /* or a GOP header */
2998     if (c.data[3] == 0xB8) {
2999       seen_gop = TRUE;
3000       data_scan_ctx_advance (tf, &c, 8);
3001       continue;
3002     }
3003 
3004     /* but what we'd really like to see is a picture header */
3005     if (c.data[3] == 0x00) {
3006       ++num_pic_headers;
3007       last_pic_offset = c.offset;
3008       data_scan_ctx_advance (tf, &c, 8);
3009       continue;
3010     }
3011 
3012     /* ... each followed by a slice header with slice_vertical_pos=1 that's
3013      * not too far away from the previously seen picture header. */
3014     if (c.data[3] == 0x01 && num_pic_headers > found &&
3015         (c.offset - last_pic_offset) >= 4 &&
3016         (c.offset - last_pic_offset) <= 64) {
3017       data_scan_ctx_advance (tf, &c, 4);
3018       found += 1;
3019       continue;
3020     }
3021 
3022   next:
3023 
3024     data_scan_ctx_advance (tf, &c, 1);
3025   }
3026 
3027   if (found > 0 || seen_seq) {
3028     GstTypeFindProbability probability = 0;
3029 
3030     GST_LOG ("Found %d pictures, seq:%d, gop:%d", found, seen_seq, seen_gop);
3031 
3032     if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_seq && seen_gop)
3033       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 1;
3034     else if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_seq)
3035       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 9;
3036     else if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
3037       probability = GST_TYPE_FIND_LIKELY;
3038     else if (seen_seq_at_0 && seen_gop && found > 2)
3039       probability = GST_TYPE_FIND_LIKELY - 10;
3040     else if (seen_seq && seen_gop && found > 2)
3041       probability = GST_TYPE_FIND_LIKELY - 20;
3042     else if (seen_seq_at_0 && found > 0)
3043       probability = GST_TYPE_FIND_POSSIBLE;
3044     else if (seen_seq && found > 0)
3045       probability = GST_TYPE_FIND_POSSIBLE - 5;
3046     else if (found > 0)
3047       probability = GST_TYPE_FIND_POSSIBLE - 10;
3048     else if (seen_seq)
3049       probability = GST_TYPE_FIND_POSSIBLE - 20;
3050 
3051     gst_type_find_suggest_simple (tf, probability, "video/mpeg",
3052         "systemstream", G_TYPE_BOOLEAN, FALSE,
3053         "mpegversion", G_TYPE_INT, 1, "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
3054   }
3055 }
3056 
3057 /*** audio/x-aiff ***/
3058 
3059 static GstStaticCaps aiff_caps = GST_STATIC_CAPS ("audio/x-aiff");
3060 
3061 #define AIFF_CAPS gst_static_caps_get(&aiff_caps)
3062 static void
aiff_type_find(GstTypeFind * tf,gpointer unused)3063 aiff_type_find (GstTypeFind * tf, gpointer unused)
3064 {
3065   const guint8 *data = gst_type_find_peek (tf, 0, 16);
3066 
3067   if (data && memcmp (data, "FORM", 4) == 0) {
3068     data += 8;
3069     if (memcmp (data, "AIFF", 4) == 0 || memcmp (data, "AIFC", 4) == 0)
3070       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, AIFF_CAPS);
3071   }
3072 }
3073 
3074 /*** audio/x-svx ***/
3075 
3076 static GstStaticCaps svx_caps = GST_STATIC_CAPS ("audio/x-svx");
3077 
3078 #define SVX_CAPS gst_static_caps_get(&svx_caps)
3079 static void
svx_type_find(GstTypeFind * tf,gpointer unused)3080 svx_type_find (GstTypeFind * tf, gpointer unused)
3081 {
3082   const guint8 *data = gst_type_find_peek (tf, 0, 16);
3083 
3084   if (data && memcmp (data, "FORM", 4) == 0) {
3085     data += 8;
3086     if (memcmp (data, "8SVX", 4) == 0 || memcmp (data, "16SV", 4) == 0)
3087       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SVX_CAPS);
3088   }
3089 }
3090 
3091 /*** audio/x-shorten ***/
3092 
3093 static GstStaticCaps shn_caps = GST_STATIC_CAPS ("audio/x-shorten");
3094 
3095 #define SHN_CAPS gst_static_caps_get(&shn_caps)
3096 static void
shn_type_find(GstTypeFind * tf,gpointer unused)3097 shn_type_find (GstTypeFind * tf, gpointer unused)
3098 {
3099   const guint8 *data = gst_type_find_peek (tf, 0, 4);
3100 
3101   if (data && memcmp (data, "ajkg", 4) == 0) {
3102     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SHN_CAPS);
3103   }
3104   data = gst_type_find_peek (tf, -8, 8);
3105   if (data && memcmp (data, "SHNAMPSK", 8) == 0) {
3106     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SHN_CAPS);
3107   }
3108 }
3109 
3110 /*** application/x-ape ***/
3111 
3112 static GstStaticCaps ape_caps = GST_STATIC_CAPS ("application/x-ape");
3113 
3114 #define APE_CAPS gst_static_caps_get(&ape_caps)
3115 static void
ape_type_find(GstTypeFind * tf,gpointer unused)3116 ape_type_find (GstTypeFind * tf, gpointer unused)
3117 {
3118   const guint8 *data = gst_type_find_peek (tf, 0, 4);
3119 
3120   if (data && memcmp (data, "MAC ", 4) == 0) {
3121     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY + 10, APE_CAPS);
3122   }
3123 }
3124 
3125 /*** ISO FORMATS ***/
3126 
3127 /*** audio/x-m4a ***/
3128 
3129 static GstStaticCaps m4a_caps = GST_STATIC_CAPS ("audio/x-m4a");
3130 
3131 #define M4A_CAPS (gst_static_caps_get(&m4a_caps))
3132 static void
m4a_type_find(GstTypeFind * tf,gpointer unused)3133 m4a_type_find (GstTypeFind * tf, gpointer unused)
3134 {
3135   const guint8 *data = gst_type_find_peek (tf, 4, 8);
3136 
3137   if (data && (memcmp (data, "ftypM4A ", 8) == 0)) {
3138     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, M4A_CAPS);
3139   }
3140 }
3141 
3142 /*** application/x-3gp ***/
3143 
3144 /* The Q is there because variables can't start with a number. */
3145 static GstStaticCaps q3gp_caps = GST_STATIC_CAPS ("application/x-3gp");
3146 #define Q3GP_CAPS (gst_static_caps_get(&q3gp_caps))
3147 
3148 static const gchar *
q3gp_type_find_get_profile(const guint8 * data)3149 q3gp_type_find_get_profile (const guint8 * data)
3150 {
3151   switch (GST_MAKE_FOURCC (data[0], data[1], data[2], 0)) {
3152     case GST_MAKE_FOURCC ('3', 'g', 'g', 0):
3153       return "general";
3154     case GST_MAKE_FOURCC ('3', 'g', 'p', 0):
3155       return "basic";
3156     case GST_MAKE_FOURCC ('3', 'g', 's', 0):
3157       return "streaming-server";
3158     case GST_MAKE_FOURCC ('3', 'g', 'r', 0):
3159       return "progressive-download";
3160     default:
3161       break;
3162   }
3163   return NULL;
3164 }
3165 
3166 static void
q3gp_type_find(GstTypeFind * tf,gpointer unused)3167 q3gp_type_find (GstTypeFind * tf, gpointer unused)
3168 {
3169   const gchar *profile;
3170   guint32 ftyp_size = 0;
3171   guint32 offset = 0;
3172   const guint8 *data = NULL;
3173 
3174   if ((data = gst_type_find_peek (tf, 0, 12)) == NULL) {
3175     return;
3176   }
3177 
3178   data += 4;
3179   if (memcmp (data, "ftyp", 4) != 0) {
3180     return;
3181   }
3182 
3183   /* check major brand */
3184   data += 4;
3185   if ((profile = q3gp_type_find_get_profile (data))) {
3186     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
3187         "application/x-3gp", "profile", G_TYPE_STRING, profile, NULL);
3188     return;
3189   }
3190 
3191   /* check compatible brands */
3192   if ((data = gst_type_find_peek (tf, 0, 4)) != NULL) {
3193     ftyp_size = GST_READ_UINT32_BE (data);
3194   }
3195   if ((data = gst_type_find_peek (tf, 0, ftyp_size)) != NULL) {
3196     for (offset = 16; offset + 4 < ftyp_size; offset += 4) {
3197       if ((profile = q3gp_type_find_get_profile (data + offset))) {
3198         gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
3199             "application/x-3gp", "profile", G_TYPE_STRING, profile, NULL);
3200         return;
3201       }
3202     }
3203   }
3204 
3205   return;
3206 
3207 }
3208 
3209 /*** video/mj2 and image/jp2 ***/
3210 static GstStaticCaps mj2_caps = GST_STATIC_CAPS ("video/mj2");
3211 
3212 #define MJ2_CAPS gst_static_caps_get(&mj2_caps)
3213 
3214 static GstStaticCaps jp2_caps = GST_STATIC_CAPS ("image/jp2");
3215 
3216 #define JP2_CAPS gst_static_caps_get(&jp2_caps)
3217 
3218 static void
jp2_type_find(GstTypeFind * tf,gpointer unused)3219 jp2_type_find (GstTypeFind * tf, gpointer unused)
3220 {
3221   const guint8 *data;
3222 
3223   data = gst_type_find_peek (tf, 0, 24);
3224   if (!data)
3225     return;
3226 
3227   /* jp2 signature */
3228   if (memcmp (data, "\000\000\000\014jP  \015\012\207\012", 12) != 0)
3229     return;
3230 
3231   /* check ftyp box */
3232   data += 12;
3233   if (memcmp (data + 4, "ftyp", 4) == 0) {
3234     if (memcmp (data + 8, "jp2 ", 4) == 0)
3235       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, JP2_CAPS);
3236     else if (memcmp (data + 8, "mjp2", 4) == 0)
3237       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MJ2_CAPS);
3238   }
3239 }
3240 
3241 
3242 static GstStaticCaps jpc_caps = GST_STATIC_CAPS ("image/x-jpc");
3243 
3244 #define JPC_CAPS gst_static_caps_get(&jpc_caps)
3245 
3246 static void
jpc_type_find(GstTypeFind * tf,gpointer unused)3247 jpc_type_find (GstTypeFind * tf, gpointer unused)
3248 {
3249   gboolean found_cod = FALSE;
3250   gboolean found_qcd = FALSE;
3251   gboolean found_sot = FALSE;
3252   const guint8 *data;
3253   gint offset = 0;
3254   const guint8 soc_siz[] = { 0xff, 0x4f, 0xff, 0x51 };
3255 
3256 #define GST_TYPE_FIND_JPC_MARKER_SOT  0xFF90
3257 #define GST_TYPE_FIND_JPC_MARKER_COD  0xFF52
3258 #define GST_TYPE_FIND_JPC_MARKER_QCD  0xFF5C
3259 #define GST_TYPE_FIND_JPC_MARKER_COC  0xFF53
3260 #define GST_TYPE_FIND_JPC_MARKER_RGN  0xFF5E
3261 #define GST_TYPE_FIND_JPC_MARKER_QCC  0xFF5D
3262 #define GST_TYPE_FIND_JPC_MARKER_POC  0xFF5F
3263 #define GST_TYPE_FIND_JPC_MARKER_PLM  0xFF57
3264 #define GST_TYPE_FIND_JPC_MARKER_PPM  0xFF60
3265 #define GST_TYPE_FIND_JPC_MARKER_TLM  0xFF55
3266 #define GST_TYPE_FIND_JPC_MARKER_CRG  0xFF63
3267 #define GST_TYPE_FIND_JPC_MARKER_COM  0xFF64
3268 #define GST_TYPE_FIND_JPC_MARKER_CBD  0xFF78
3269 #define GST_TYPE_FIND_JPC_MARKER_MCC  0xFF75
3270 #define GST_TYPE_FIND_JPC_MARKER_MCT  0xFF74
3271 #define GST_TYPE_FIND_JPC_MARKER_MCO  0xFF77
3272 
3273 
3274   /* SOC marker + SIZ marker */
3275   if ((data = gst_type_find_peek (tf, 0, 4)) != NULL) {
3276     if (memcmp (data, soc_siz, 4) != 0)
3277       return;
3278     offset += 4;
3279   } else {
3280     return;
3281   }
3282 
3283   while (!found_sot) {
3284 
3285     /* skip actual marker data */
3286     if ((data = gst_type_find_peek (tf, offset, 2)) != NULL) {
3287       offset += GST_READ_UINT16_BE (data);
3288     } else {
3289       return;
3290     }
3291 
3292     /* read marker */
3293     if ((data = gst_type_find_peek (tf, offset, 2)) != NULL) {
3294       guint16 marker = GST_READ_UINT16_BE (data);
3295       switch (marker) {
3296         case GST_TYPE_FIND_JPC_MARKER_SOT:
3297           found_sot = TRUE;
3298           break;
3299         case GST_TYPE_FIND_JPC_MARKER_COD:
3300           found_cod = TRUE;
3301           break;
3302         case GST_TYPE_FIND_JPC_MARKER_QCD:
3303           found_qcd = TRUE;
3304           break;
3305           /* optional header markers */
3306         case GST_TYPE_FIND_JPC_MARKER_COC:
3307         case GST_TYPE_FIND_JPC_MARKER_RGN:
3308         case GST_TYPE_FIND_JPC_MARKER_QCC:
3309         case GST_TYPE_FIND_JPC_MARKER_POC:
3310         case GST_TYPE_FIND_JPC_MARKER_PLM:
3311         case GST_TYPE_FIND_JPC_MARKER_PPM:
3312         case GST_TYPE_FIND_JPC_MARKER_TLM:
3313         case GST_TYPE_FIND_JPC_MARKER_CRG:
3314         case GST_TYPE_FIND_JPC_MARKER_COM:
3315         case GST_TYPE_FIND_JPC_MARKER_CBD:
3316         case GST_TYPE_FIND_JPC_MARKER_MCC:
3317         case GST_TYPE_FIND_JPC_MARKER_MCT:
3318         case GST_TYPE_FIND_JPC_MARKER_MCO:
3319           break;
3320           /* unrecognized marker */
3321         default:
3322           return;
3323       }
3324       offset += 2;
3325     } else {
3326       return;
3327     }
3328   }
3329 
3330   if (found_cod && found_qcd && found_sot)
3331     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, JPC_CAPS);
3332 }
3333 
3334 
3335 /*** video/quicktime ***/
3336 
3337 static GstStaticCaps qt_caps = GST_STATIC_CAPS ("video/quicktime");
3338 
3339 #define QT_CAPS gst_static_caps_get(&qt_caps)
3340 #define STRNCMP(x,y,z) (strncmp ((char*)(x), (char*)(y), z))
3341 
3342 /* FIXME 0.11: go through http://www.ftyps.com/ */
3343 static void
qt_type_find(GstTypeFind * tf,gpointer unused)3344 qt_type_find (GstTypeFind * tf, gpointer unused)
3345 {
3346   const guint8 *data;
3347   guint tip = 0;
3348   guint64 offset = 0;
3349   guint64 size;
3350   const gchar *variant = NULL;
3351 
3352   while ((data = gst_type_find_peek (tf, offset, 12)) != NULL) {
3353     guint64 new_offset;
3354 
3355     if (STRNCMP (&data[4], "ftypqt  ", 8) == 0) {
3356       tip = GST_TYPE_FIND_MAXIMUM;
3357       break;
3358     }
3359 
3360     if (STRNCMP (&data[4], "ftypisom", 8) == 0 ||
3361         STRNCMP (&data[4], "ftypavc1", 8) == 0 ||
3362         STRNCMP (&data[4], "ftypmp42", 8) == 0) {
3363       tip = GST_TYPE_FIND_MAXIMUM;
3364       variant = "iso";
3365       break;
3366     }
3367 
3368     if (STRNCMP (&data[4], "ftypisml", 8) == 0 ||
3369         STRNCMP (&data[4], "ftypavc3", 8) == 0) {
3370       tip = GST_TYPE_FIND_MAXIMUM;
3371       variant = "iso-fragmented";
3372       break;
3373     }
3374 
3375     if (STRNCMP (&data[4], "ftypccff", 8) == 0) {
3376       tip = GST_TYPE_FIND_MAXIMUM;
3377       variant = "ccff";
3378       break;
3379     }
3380 
3381     /* box/atom types that are in common with ISO base media file format */
3382     if (STRNCMP (&data[4], "moov", 4) == 0 ||
3383         STRNCMP (&data[4], "mdat", 4) == 0 ||
3384         STRNCMP (&data[4], "ftyp", 4) == 0 ||
3385         STRNCMP (&data[4], "free", 4) == 0 ||
3386         STRNCMP (&data[4], "uuid", 4) == 0 ||
3387         STRNCMP (&data[4], "moof", 4) == 0 ||
3388         STRNCMP (&data[4], "skip", 4) == 0) {
3389       if (tip == 0) {
3390         tip = GST_TYPE_FIND_LIKELY;
3391       } else {
3392         tip = GST_TYPE_FIND_NEARLY_CERTAIN;
3393       }
3394     }
3395     /* other box/atom types, apparently quicktime specific */
3396     else if (STRNCMP (&data[4], "pnot", 4) == 0 ||
3397         STRNCMP (&data[4], "PICT", 4) == 0 ||
3398         STRNCMP (&data[4], "wide", 4) == 0 ||
3399         STRNCMP (&data[4], "prfl", 4) == 0) {
3400       tip = GST_TYPE_FIND_MAXIMUM;
3401       break;
3402     } else {
3403       tip = 0;
3404       break;
3405     }
3406 
3407     size = GST_READ_UINT32_BE (data);
3408     if (size + offset >= G_MAXINT64)
3409       break;
3410     /* check compatible brands rather than ever expaning major brands above */
3411     if ((STRNCMP (&data[4], "ftyp", 4) == 0) && (size >= 16)) {
3412       data = gst_type_find_peek (tf, offset, size);
3413       if (data == NULL)
3414         goto done;
3415       new_offset = 12;
3416       while (new_offset + 4 <= size) {
3417         if (STRNCMP (&data[new_offset], "isom", 4) == 0 ||
3418             STRNCMP (&data[new_offset], "dash", 4) == 0 ||
3419             STRNCMP (&data[new_offset], "avc1", 4) == 0 ||
3420             STRNCMP (&data[new_offset], "avc3", 4) == 0 ||
3421             STRNCMP (&data[new_offset], "mp41", 4) == 0 ||
3422             STRNCMP (&data[new_offset], "mp42", 4) == 0) {
3423           tip = GST_TYPE_FIND_MAXIMUM;
3424           variant = "iso";
3425           goto done;
3426         }
3427         new_offset += 4;
3428       }
3429     }
3430     if (size == 1) {
3431       const guint8 *sizedata;
3432 
3433       sizedata = gst_type_find_peek (tf, offset + 8, 8);
3434       if (sizedata == NULL)
3435         break;
3436 
3437       size = GST_READ_UINT64_BE (sizedata);
3438     } else {
3439       if (size < 8)
3440         break;
3441     }
3442     new_offset = offset + size;
3443     if (new_offset <= offset)
3444       break;
3445     if (new_offset + 16 >= G_MAXINT64)
3446       break;
3447     offset = new_offset;
3448   }
3449 
3450 done:
3451   if (tip > 0) {
3452     if (variant) {
3453       GstCaps *caps = gst_caps_copy (QT_CAPS);
3454 
3455       gst_caps_set_simple (caps, "variant", G_TYPE_STRING, variant, NULL);
3456       gst_type_find_suggest (tf, tip, caps);
3457       gst_caps_unref (caps);
3458     } else {
3459       gst_type_find_suggest (tf, tip, QT_CAPS);
3460     }
3461   }
3462 };
3463 
3464 
3465 /*** image/x-quicktime ***/
3466 
3467 static GstStaticCaps qtif_caps = GST_STATIC_CAPS ("image/x-quicktime");
3468 
3469 #define QTIF_CAPS gst_static_caps_get(&qtif_caps)
3470 
3471 /* how many atoms we check before we give up */
3472 #define QTIF_MAXROUNDS 25
3473 
3474 static void
qtif_type_find(GstTypeFind * tf,gpointer unused)3475 qtif_type_find (GstTypeFind * tf, gpointer unused)
3476 {
3477   const guint8 *data;
3478   gboolean found_idsc = FALSE;
3479   gboolean found_idat = FALSE;
3480   guint64 offset = 0;
3481   guint rounds = 0;
3482 
3483   while ((data = gst_type_find_peek (tf, offset, 8)) != NULL) {
3484     guint64 size;
3485 
3486     size = GST_READ_UINT32_BE (data);
3487     if (size == 1) {
3488       const guint8 *sizedata;
3489 
3490       sizedata = gst_type_find_peek (tf, offset + 8, 8);
3491       if (sizedata == NULL)
3492         break;
3493 
3494       size = GST_READ_UINT64_BE (sizedata);
3495     }
3496     if (size < 8)
3497       break;
3498 
3499     if (STRNCMP (data + 4, "idsc", 4) == 0)
3500       found_idsc = TRUE;
3501     if (STRNCMP (data + 4, "idat", 4) == 0)
3502       found_idat = TRUE;
3503 
3504     if (found_idsc && found_idat) {
3505       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, QTIF_CAPS);
3506       return;
3507     }
3508 
3509     offset += size;
3510     if (offset + 8 >= G_MAXINT64)
3511       break;
3512     if (++rounds > QTIF_MAXROUNDS)
3513       break;
3514   }
3515 
3516   if (found_idsc || found_idat) {
3517     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, QTIF_CAPS);
3518     return;
3519   }
3520 };
3521 
3522 /*** audio/x-mod ***/
3523 
3524 static GstStaticCaps mod_caps = GST_STATIC_CAPS ("audio/x-mod");
3525 
3526 #define MOD_CAPS gst_static_caps_get(&mod_caps)
3527 /* FIXME: M15 CheckType to do */
3528 static void
mod_type_find(GstTypeFind * tf,gpointer unused)3529 mod_type_find (GstTypeFind * tf, gpointer unused)
3530 {
3531   const guint8 *data;
3532   GstTypeFindProbability probability;
3533   const char *mod_type = NULL;
3534 
3535   /* MOD */
3536   if ((data = gst_type_find_peek (tf, 1080, 4)) != NULL) {
3537     /* Protracker and variants */
3538     if ((memcmp (data, "M.K.", 4) == 0) ||
3539         (memcmp (data, "M!K!", 4) == 0) ||
3540         (memcmp (data, "M&K!", 4) == 0) || (memcmp (data, "N.T.", 4) == 0) ||
3541         /* Star Tracker */
3542         (memcmp (data, "FLT", 3) == 0 && isdigit (data[3])) ||
3543         (memcmp (data, "EXO", 3) == 0 && isdigit (data[3])) ||
3544         /* Oktalyzer (Amiga) */
3545         (memcmp (data, "OKTA", 4) == 0) || (memcmp (data, "OCTA", 4) == 0) ||
3546         /* Oktalyser (Atari) */
3547         (memcmp (data, "CD81", 4) == 0) ||
3548         /* Taketracker */
3549         (memcmp (data, "TDZ", 3) == 0 && isdigit (data[3])) ||
3550         /* Fasttracker */
3551         (memcmp (data + 1, "CHN", 3) == 0 && isdigit (data[0])) ||
3552         /* Fasttracker or Taketracker */
3553         (memcmp (data + 2, "CH", 2) == 0 && isdigit (data[0])
3554             && isdigit (data[1])) || (memcmp (data + 2, "CN", 2) == 0
3555             && isdigit (data[0]) && isdigit (data[1]))) {
3556       mod_type = "mod";
3557       probability = GST_TYPE_FIND_MAXIMUM;
3558       goto suggest_audio_mod_caps;
3559     }
3560   }
3561   /* J2B (Jazz Jackrabbit 2) */
3562   if ((data = gst_type_find_peek (tf, 0, 8)) != NULL) {
3563     if ((memcmp (data, "MUSE\xDE\xAD", 4) == 0) &&
3564         ((memcmp (data + 6, "\xBE\xEF", 2) == 0) ||
3565             (memcmp (data + 6, "\xBA\xBE", 2) == 0))) {
3566       mod_type = "j2b";
3567       probability = GST_TYPE_FIND_MAXIMUM;
3568       goto suggest_audio_mod_caps;
3569     }
3570   }
3571   /* AMS (Velvet Studio) */
3572   if ((data = gst_type_find_peek (tf, 0, 7)) != NULL) {
3573     if (memcmp (data, "AMShdr\x1A", 7) == 0) {
3574       mod_type = "velvet-ams";
3575       probability = GST_TYPE_FIND_MAXIMUM;
3576       goto suggest_audio_mod_caps;
3577     }
3578   }
3579   /* AMS (Extreme Tracker) */
3580   if ((data = gst_type_find_peek (tf, 0, 9)) != NULL) {
3581     if ((memcmp (data, "Extreme", 7) == 0) && (data[8] == 1)) {
3582       mod_type = "extreme-ams";
3583       probability = GST_TYPE_FIND_LIKELY;
3584       goto suggest_audio_mod_caps;
3585     }
3586   }
3587   /* ULT (Ultratracker) */
3588   if ((data = gst_type_find_peek (tf, 0, 14)) != NULL) {
3589     if (memcmp (data, "MAS_UTrack_V00", 14) == 0) {
3590       mod_type = "ult";
3591       probability = GST_TYPE_FIND_MAXIMUM;
3592       goto suggest_audio_mod_caps;
3593     }
3594   }
3595   /* DIGI (DigiBooster) */
3596   if ((data = gst_type_find_peek (tf, 0, 20)) != NULL) {
3597     if (memcmp (data, "DIGI Booster module\0", 20) == 0) {
3598       mod_type = "digi";
3599       probability = GST_TYPE_FIND_MAXIMUM;
3600       goto suggest_audio_mod_caps;
3601     }
3602   }
3603   /* PTM (PolyTracker) */
3604   if ((data = gst_type_find_peek (tf, 0x2C, 4)) != NULL) {
3605     if (memcmp (data, "PTMF", 4) == 0) {
3606       mod_type = "ptm";
3607       probability = GST_TYPE_FIND_LIKELY;
3608       goto suggest_audio_mod_caps;
3609     }
3610   }
3611   /* XM */
3612   if ((data = gst_type_find_peek (tf, 0, 38)) != NULL) {
3613     if ((memcmp (data, "Extended Module: ", 17) == 0) && (data[37] == 0x1A)) {
3614       mod_type = "xm";
3615       probability = GST_TYPE_FIND_MAXIMUM;
3616       goto suggest_audio_mod_caps;
3617     }
3618   }
3619   /* OKT */
3620   if (data || (data = gst_type_find_peek (tf, 0, 8)) != NULL) {
3621     if (memcmp (data, "OKTASONG", 8) == 0) {
3622       mod_type = "okt";
3623       probability = GST_TYPE_FIND_MAXIMUM;
3624       goto suggest_audio_mod_caps;
3625     }
3626   }
3627   /* Various formats with a 4-byte magic ID at the beginning of the file */
3628   if (data || (data = gst_type_find_peek (tf, 0, 4)) != NULL) {
3629     /* PSM (Protracker Studio PSM) */
3630     if (memcmp (data, "PSM", 3) == 0) {
3631       unsigned char fbyte = data[3];
3632       if ((fbyte == ' ') || (fbyte == 254)) {
3633         mod_type = "psm";
3634         probability = GST_TYPE_FIND_MAXIMUM;
3635         goto suggest_audio_mod_caps;
3636       }
3637     }
3638     /* 669 */
3639     if ((memcmp (data, "if", 2) == 0) || (memcmp (data, "JN", 2) == 0)) {
3640       mod_type = "669";
3641       probability = GST_TYPE_FIND_LIKELY;
3642       goto suggest_audio_mod_caps;
3643     }
3644     /* AMF */
3645     if ((memcmp (data, "AMF", 3) == 0) && (data[3] > 10) && (data[3] < 14)) {
3646       mod_type = "dsmi-amf";
3647       probability = GST_TYPE_FIND_MAXIMUM;
3648       goto suggest_audio_mod_caps;
3649     }
3650     /* IT */
3651     if (memcmp (data, "IMPM", 4) == 0) {
3652       mod_type = "it";
3653       probability = GST_TYPE_FIND_MAXIMUM;
3654       goto suggest_audio_mod_caps;
3655     }
3656     /* DBM (DigiBooster Pro) */
3657     if (memcmp (data, "DBM0", 4) == 0) {
3658       mod_type = "dbm";
3659       probability = GST_TYPE_FIND_MAXIMUM;
3660       goto suggest_audio_mod_caps;
3661     }
3662     /* MDL (DigiTrakker) */
3663     if (memcmp (data, "DMDL", 4) == 0) {
3664       mod_type = "mdl";
3665       probability = GST_TYPE_FIND_MAXIMUM;
3666       goto suggest_audio_mod_caps;
3667     }
3668     /* MT2 (MadTracker 2.0) */
3669     if (memcmp (data, "MT20", 4) == 0) {
3670       mod_type = "mt2";
3671       probability = GST_TYPE_FIND_MAXIMUM;
3672       goto suggest_audio_mod_caps;
3673     }
3674     /* DMF (X-Tracker) */
3675     if (memcmp (data, "DDMF", 4) == 0) {
3676       mod_type = "dmf";
3677       probability = GST_TYPE_FIND_MAXIMUM;
3678       goto suggest_audio_mod_caps;
3679     }
3680     /* MED */
3681     if ((memcmp (data, "MMD0", 4) == 0) || (memcmp (data, "MMD1", 4) == 0)) {
3682       mod_type = "med";
3683       probability = GST_TYPE_FIND_MAXIMUM;
3684       goto suggest_audio_mod_caps;
3685     }
3686     /* MTM */
3687     if (memcmp (data, "MTM", 3) == 0) {
3688       mod_type = "mtm";
3689       probability = GST_TYPE_FIND_MAXIMUM;
3690       goto suggest_audio_mod_caps;
3691     }
3692     /* DSM */
3693     if (memcmp (data, "RIFF", 4) == 0) {
3694       const guint8 *data2 = gst_type_find_peek (tf, 8, 4);
3695 
3696       if (data2) {
3697         if (memcmp (data2, "DSMF", 4) == 0) {
3698           mod_type = "dsm";
3699           probability = GST_TYPE_FIND_MAXIMUM;
3700           goto suggest_audio_mod_caps;
3701         }
3702       }
3703     }
3704     /* FAR (Farandole) */
3705     if (memcmp (data, "FAR\xFE", 4) == 0) {
3706       mod_type = "far";
3707       probability = GST_TYPE_FIND_MAXIMUM;
3708       goto suggest_audio_mod_caps;
3709     }
3710     /* FAM */
3711     if (memcmp (data, "FAM\xFE", 4) == 0) {
3712       const guint8 *data2 = gst_type_find_peek (tf, 44, 3);
3713 
3714       if (data2) {
3715         if (memcmp (data2, "compare", 3) == 0) {
3716           mod_type = "fam";
3717           probability = GST_TYPE_FIND_MAXIMUM;
3718           goto suggest_audio_mod_caps;
3719         }
3720         /* otherwise do not suggest anything */
3721       } else {
3722         mod_type = "fam";
3723         probability = GST_TYPE_FIND_LIKELY;
3724         goto suggest_audio_mod_caps;
3725       }
3726     }
3727     /* GDM */
3728     if (memcmp (data, "GDM\xFE", 4) == 0) {
3729       const guint8 *data2 = gst_type_find_peek (tf, 71, 4);
3730 
3731       if (data2) {
3732         if (memcmp (data2, "GMFS", 4) == 0) {
3733           mod_type = "gdm";
3734           probability = GST_TYPE_FIND_MAXIMUM;
3735           goto suggest_audio_mod_caps;
3736         }
3737         /* otherwise do not suggest anything */
3738       } else {
3739         mod_type = "gdm";
3740         probability = GST_TYPE_FIND_LIKELY;
3741         goto suggest_audio_mod_caps;
3742       }
3743     }
3744     /* UMX */
3745     if (memcmp (data, "\xC1\x83\x2A\x9E", 4) == 0) {
3746       mod_type = "umx";
3747       probability = GST_TYPE_FIND_POSSIBLE;
3748       goto suggest_audio_mod_caps;
3749     }
3750   }
3751   /* FAR (Farandole) (secondary detection) */
3752   if ((data = gst_type_find_peek (tf, 44, 3)) != NULL) {
3753     if (memcmp (data, "\x0D\x0A\x1A", 3) == 0) {
3754       mod_type = "far";
3755       probability = GST_TYPE_FIND_POSSIBLE;
3756       goto suggest_audio_mod_caps;
3757     }
3758   }
3759   /* IMF */
3760   if ((data = gst_type_find_peek (tf, 60, 4)) != NULL) {
3761     if (memcmp (data, "IM10", 4) == 0) {
3762       mod_type = "imf";
3763       probability = GST_TYPE_FIND_MAXIMUM;
3764       goto suggest_audio_mod_caps;
3765     }
3766   }
3767   /* S3M */
3768   if ((data = gst_type_find_peek (tf, 44, 4)) != NULL) {
3769     if (memcmp (data, "SCRM", 4) == 0) {
3770       mod_type = "s3m";
3771       probability = GST_TYPE_FIND_MAXIMUM;
3772       goto suggest_audio_mod_caps;
3773     }
3774   }
3775   /* STM */
3776   if ((data = gst_type_find_peek (tf, 20, 8)) != NULL) {
3777     if (g_ascii_strncasecmp ((gchar *) data, "!Scream!", 8) == 0 ||
3778         g_ascii_strncasecmp ((gchar *) data, "BMOD2STM", 8) == 0) {
3779       const guint8 *id, *stmtype;
3780 
3781       if ((id = gst_type_find_peek (tf, 28, 1)) == NULL)
3782         return;
3783       if ((stmtype = gst_type_find_peek (tf, 29, 1)) == NULL)
3784         return;
3785       if (*id == 0x1A && *stmtype == 2) {
3786         mod_type = "stm";
3787         probability = GST_TYPE_FIND_MAXIMUM;
3788         goto suggest_audio_mod_caps;
3789       }
3790     }
3791   }
3792   /* AMF */
3793   if ((data = gst_type_find_peek (tf, 0, 19)) != NULL) {
3794     if (memcmp (data, "ASYLUM Music Format", 19) == 0) {
3795       mod_type = "asylum-amf";
3796       probability = GST_TYPE_FIND_MAXIMUM;
3797       goto suggest_audio_mod_caps;
3798     }
3799   }
3800 
3801 suggest_audio_mod_caps:
3802   if (mod_type != NULL) {
3803     GstCaps *caps = gst_caps_new_simple ("audio/x-mod",
3804         "type", G_TYPE_STRING, mod_type, NULL);
3805 
3806     gst_type_find_suggest (tf, probability, caps);
3807     gst_caps_unref (caps);
3808   }
3809 }
3810 
3811 /*** application/x-shockwave-flash ***/
3812 
3813 static GstStaticCaps swf_caps =
3814 GST_STATIC_CAPS ("application/x-shockwave-flash");
3815 #define SWF_CAPS (gst_static_caps_get(&swf_caps))
3816 static void
swf_type_find(GstTypeFind * tf,gpointer unused)3817 swf_type_find (GstTypeFind * tf, gpointer unused)
3818 {
3819   const guint8 *data = gst_type_find_peek (tf, 0, 4);
3820 
3821   if (data && (data[0] == 'F' || data[0] == 'C') &&
3822       data[1] == 'W' && data[2] == 'S') {
3823     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SWF_CAPS);
3824   }
3825 }
3826 
3827 /*** application/vnd.ms-sstr+xml ***/
3828 
3829 static void
mss_manifest_load_utf16(gunichar2 * utf16_ne,const guint8 * utf16_data,gsize data_size,guint data_endianness)3830 mss_manifest_load_utf16 (gunichar2 * utf16_ne, const guint8 * utf16_data,
3831     gsize data_size, guint data_endianness)
3832 {
3833   memcpy (utf16_ne, utf16_data, data_size);
3834   if (data_endianness != G_BYTE_ORDER) {
3835     guint i;
3836 
3837     for (i = 0; i < data_size / 2; ++i)
3838       utf16_ne[i] = GUINT16_SWAP_LE_BE (utf16_ne[i]);
3839   }
3840 }
3841 
3842 static GstStaticCaps mss_manifest_caps =
3843 GST_STATIC_CAPS ("application/vnd.ms-sstr+xml");
3844 #define MSS_MANIFEST_CAPS (gst_static_caps_get(&mss_manifest_caps))
3845 static void
mss_manifest_type_find(GstTypeFind * tf,gpointer unused)3846 mss_manifest_type_find (GstTypeFind * tf, gpointer unused)
3847 {
3848   gunichar2 utf16_ne[512];
3849   const guint8 *data;
3850   guint data_endianness = 0;
3851   glong n_read = 0, size = 0;
3852   guint length;
3853   gchar *utf8;
3854   gboolean utf8_bom_detected = FALSE;
3855 
3856   if (xml_check_first_element (tf, "SmoothStreamingMedia", 20, TRUE)) {
3857     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
3858     return;
3859   }
3860 
3861   length = gst_type_find_get_length (tf);
3862 
3863   /* try detecting the charset */
3864   data = gst_type_find_peek (tf, 0, 3);
3865 
3866   if (data == NULL)
3867     return;
3868 
3869   /* look for a possible BOM */
3870   if (data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF)
3871     utf8_bom_detected = TRUE;
3872   else if (data[0] == 0xFF && data[1] == 0xFE)
3873     data_endianness = G_LITTLE_ENDIAN;
3874   else if (data[0] == 0xFE && data[1] == 0xFF)
3875     data_endianness = G_BIG_ENDIAN;
3876   else
3877     return;
3878 
3879   /* try a default that should be enough */
3880   if (length == 0)
3881     length = 512;
3882   else if (length < 64)
3883     return;
3884   else                          /* the first few bytes should be enough */
3885     length = MIN (1024, length);
3886 
3887   data = gst_type_find_peek (tf, 0, length);
3888 
3889   if (data == NULL)
3890     return;
3891 
3892   /* skip the BOM */
3893   data += 2;
3894   length -= 2;
3895 
3896   if (utf8_bom_detected) {
3897     /* skip last byte of the BOM */
3898     data++;
3899     length--;
3900 
3901     if (xml_check_first_element_from_data (data, length,
3902             "SmoothStreamingMedia", 20, TRUE))
3903       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
3904   } else {
3905     length = GST_ROUND_DOWN_2 (length);
3906 
3907     /* convert to native endian UTF-16 */
3908     mss_manifest_load_utf16 (utf16_ne, data, length, data_endianness);
3909 
3910     /* and now convert to UTF-8 */
3911     utf8 = g_utf16_to_utf8 (utf16_ne, length / 2, &n_read, &size, NULL);
3912     if (utf8 != NULL && n_read > 0) {
3913       if (xml_check_first_element_from_data ((const guint8 *) utf8, size,
3914               "SmoothStreamingMedia", 20, TRUE))
3915         gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
3916     }
3917     g_free (utf8);
3918   }
3919 }
3920 
3921 /*** image/jpeg ***/
3922 
3923 #define JPEG_MARKER_IS_START_OF_FRAME(x) \
3924     ((x)>=0xc0 && (x) <= 0xcf && (x)!=0xc4 && (x)!=0xc8 && (x)!=0xcc)
3925 
3926 static GstStaticCaps jpeg_caps = GST_STATIC_CAPS ("image/jpeg");
3927 
3928 #define JPEG_CAPS (gst_static_caps_get(&jpeg_caps))
3929 static void
jpeg_type_find(GstTypeFind * tf,gpointer unused)3930 jpeg_type_find (GstTypeFind * tf, gpointer unused)
3931 {
3932   GstTypeFindProbability prob = GST_TYPE_FIND_POSSIBLE;
3933   DataScanCtx c = { 0, NULL, 0 };
3934   GstCaps *caps;
3935   guint num_markers;
3936 
3937   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 2)))
3938     return;
3939 
3940   if (c.data[0] != 0xff || c.data[1] != 0xd8)
3941     return;
3942 
3943   num_markers = 1;
3944   data_scan_ctx_advance (tf, &c, 2);
3945 
3946   caps = gst_caps_copy (JPEG_CAPS);
3947 
3948   while (data_scan_ctx_ensure_data (tf, &c, 4) && c.offset < (200 * 1024)) {
3949     guint16 len;
3950     guint8 marker;
3951 
3952     if (c.data[0] != 0xff)
3953       break;
3954 
3955     marker = c.data[1];
3956     if (G_UNLIKELY (marker == 0xff)) {
3957       data_scan_ctx_advance (tf, &c, 1);
3958       continue;
3959     }
3960 
3961     data_scan_ctx_advance (tf, &c, 2);
3962 
3963     /* we assume all markers we'll see before SOF have a payload length; if
3964      * that's not the case we'll just detect a false sync and bail out, but
3965      * still report POSSIBLE probability */
3966     len = GST_READ_UINT16_BE (c.data);
3967 
3968     GST_LOG ("possible JPEG marker 0x%02x (@0x%04x), segment length %u",
3969         marker, (guint) c.offset, len);
3970 
3971     if (!data_scan_ctx_ensure_data (tf, &c, len))
3972       break;
3973 
3974     if (marker == 0xc4 ||       /* DEFINE_HUFFMAN_TABLES          */
3975         marker == 0xcc ||       /* DEFINE_ARITHMETIC_CONDITIONING */
3976         marker == 0xdb ||       /* DEFINE_QUANTIZATION_TABLES     */
3977         marker == 0xdd ||       /* DEFINE_RESTART_INTERVAL        */
3978         marker == 0xfe) {       /* COMMENT                        */
3979       data_scan_ctx_advance (tf, &c, len);
3980       ++num_markers;
3981     } else if (marker == 0xe0 && len >= (2 + 4) &&      /* APP0 */
3982         data_scan_ctx_memcmp (tf, &c, 2, "JFIF", 4)) {
3983       GST_LOG ("found JFIF tag");
3984       prob = GST_TYPE_FIND_MAXIMUM;
3985       data_scan_ctx_advance (tf, &c, len);
3986       ++num_markers;
3987       /* we continue until we find a start of frame marker */
3988     } else if (marker == 0xe1 && len >= (2 + 4) &&      /* APP1 */
3989         data_scan_ctx_memcmp (tf, &c, 2, "Exif", 4)) {
3990       GST_LOG ("found Exif tag");
3991       prob = GST_TYPE_FIND_MAXIMUM;
3992       data_scan_ctx_advance (tf, &c, len);
3993       ++num_markers;
3994       /* we continue until we find a start of frame marker */
3995     } else if (marker >= 0xe0 && marker <= 0xef) {      /* APPn */
3996       data_scan_ctx_advance (tf, &c, len);
3997       ++num_markers;
3998     } else if (JPEG_MARKER_IS_START_OF_FRAME (marker) && len >= (2 + 8)) {
3999       int h, w;
4000 
4001       h = GST_READ_UINT16_BE (c.data + 2 + 1);
4002       w = GST_READ_UINT16_BE (c.data + 2 + 1 + 2);
4003       if (h == 0 || w == 0) {
4004         GST_WARNING ("bad width %u and/or height %u in SOF header", w, h);
4005         break;
4006       }
4007 
4008       GST_LOG ("SOF at offset %" G_GUINT64_FORMAT ", num_markers=%d, "
4009           "WxH=%dx%d", c.offset - 2, num_markers, w, h);
4010 
4011       if (num_markers >= 5 || prob == GST_TYPE_FIND_MAXIMUM)
4012         prob = GST_TYPE_FIND_MAXIMUM;
4013       else
4014         prob = GST_TYPE_FIND_LIKELY;
4015 
4016       gst_caps_set_simple (caps, "width", G_TYPE_INT, w,
4017           "height", G_TYPE_INT, h, "sof-marker", G_TYPE_INT, marker & 0xf,
4018           NULL);
4019 
4020       break;
4021     } else {
4022       GST_WARNING ("bad length or unexpected JPEG marker 0xff 0x%02x", marker);
4023       break;
4024     }
4025   }
4026 
4027   gst_type_find_suggest (tf, prob, caps);
4028   gst_caps_unref (caps);
4029 }
4030 
4031 /*** image/bmp ***/
4032 
4033 static GstStaticCaps bmp_caps = GST_STATIC_CAPS ("image/bmp");
4034 
4035 #define BMP_CAPS (gst_static_caps_get(&bmp_caps))
4036 static void
bmp_type_find(GstTypeFind * tf,gpointer unused)4037 bmp_type_find (GstTypeFind * tf, gpointer unused)
4038 {
4039   DataScanCtx c = { 0, NULL, 0 };
4040   guint32 struct_size, w, h, planes, bpp;
4041 
4042   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 54)))
4043     return;
4044 
4045   if (c.data[0] != 'B' || c.data[1] != 'M')
4046     return;
4047 
4048   /* skip marker + size */
4049   data_scan_ctx_advance (tf, &c, 2 + 4);
4050 
4051   /* reserved, must be 0 */
4052   if (c.data[0] != 0 || c.data[1] != 0 || c.data[2] != 0 || c.data[3] != 0)
4053     return;
4054 
4055   data_scan_ctx_advance (tf, &c, 2 + 2);
4056 
4057   /* offset to start of image data in bytes (check for sanity) */
4058   GST_LOG ("offset=%u", GST_READ_UINT32_LE (c.data));
4059   if (GST_READ_UINT32_LE (c.data) > (10 * 1024 * 1024))
4060     return;
4061 
4062   struct_size = GST_READ_UINT32_LE (c.data + 4);
4063   GST_LOG ("struct_size=%u", struct_size);
4064 
4065   data_scan_ctx_advance (tf, &c, 4 + 4);
4066 
4067   if (struct_size == 0x0C) {
4068     w = GST_READ_UINT16_LE (c.data);
4069     h = GST_READ_UINT16_LE (c.data + 2);
4070     planes = GST_READ_UINT16_LE (c.data + 2 + 2);
4071     bpp = GST_READ_UINT16_LE (c.data + 2 + 2 + 2);
4072   } else if (struct_size == 40 || struct_size == 64 || struct_size == 108
4073       || struct_size == 124 || struct_size == 0xF0) {
4074     w = GST_READ_UINT32_LE (c.data);
4075     h = GST_READ_UINT32_LE (c.data + 4);
4076     planes = GST_READ_UINT16_LE (c.data + 4 + 4);
4077     bpp = GST_READ_UINT16_LE (c.data + 4 + 4 + 2);
4078   } else {
4079     return;
4080   }
4081 
4082   /* image sizes sanity check */
4083   GST_LOG ("w=%u, h=%u, planes=%u, bpp=%u", w, h, planes, bpp);
4084   if (w == 0 || w > 0xfffff || h == 0 || h > 0xfffff || planes != 1 ||
4085       (bpp != 1 && bpp != 4 && bpp != 8 && bpp != 16 && bpp != 24 && bpp != 32))
4086     return;
4087 
4088   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, "image/bmp",
4089       "width", G_TYPE_INT, w, "height", G_TYPE_INT, h, "bpp", G_TYPE_INT, bpp,
4090       NULL);
4091 }
4092 
4093 /*** image/tiff ***/
4094 static GstStaticCaps tiff_caps = GST_STATIC_CAPS ("image/tiff, "
4095     "endianness = (int) { BIG_ENDIAN, LITTLE_ENDIAN }");
4096 #define TIFF_CAPS (gst_static_caps_get(&tiff_caps))
4097 static GstStaticCaps tiff_be_caps = GST_STATIC_CAPS ("image/tiff, "
4098     "endianness = (int) BIG_ENDIAN");
4099 #define TIFF_BE_CAPS (gst_static_caps_get(&tiff_be_caps))
4100 static GstStaticCaps tiff_le_caps = GST_STATIC_CAPS ("image/tiff, "
4101     "endianness = (int) LITTLE_ENDIAN");
4102 #define TIFF_LE_CAPS (gst_static_caps_get(&tiff_le_caps))
4103 static void
tiff_type_find(GstTypeFind * tf,gpointer ununsed)4104 tiff_type_find (GstTypeFind * tf, gpointer ununsed)
4105 {
4106   const guint8 *data = gst_type_find_peek (tf, 0, 8);
4107   guint8 le_header[4] = { 0x49, 0x49, 0x2A, 0x00 };
4108   guint8 be_header[4] = { 0x4D, 0x4D, 0x00, 0x2A };
4109 
4110   if (data) {
4111     if (memcmp (data, le_header, 4) == 0) {
4112       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TIFF_LE_CAPS);
4113     } else if (memcmp (data, be_header, 4) == 0) {
4114       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TIFF_BE_CAPS);
4115     }
4116   }
4117 }
4118 
4119 /*** image/x-exr ***/
4120 static GstStaticCaps exr_caps = GST_STATIC_CAPS ("image/x-exr");
4121 #define EXR_CAPS (gst_static_caps_get(&exr_caps))
4122 static void
exr_type_find(GstTypeFind * tf,gpointer ununsed)4123 exr_type_find (GstTypeFind * tf, gpointer ununsed)
4124 {
4125   const guint8 *data = gst_type_find_peek (tf, 0, 8);
4126 
4127   if (data) {
4128     guint32 flags;
4129 
4130     if (GST_READ_UINT32_LE (data) != 0x01312f76)
4131       return;
4132 
4133     flags = GST_READ_UINT32_LE (data + 4);
4134     if ((flags & 0xff) != 1 && (flags & 0xff) != 2)
4135       return;
4136 
4137     /* If bit 9 is set, bit 11 and 12 must be 0 */
4138     if ((flags & 0x200) && (flags & 0x1800))
4139       return;
4140 
4141     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, EXR_CAPS);
4142   }
4143 }
4144 
4145 
4146 /*** PNM ***/
4147 
4148 static GstStaticCaps pnm_caps = GST_STATIC_CAPS ("image/x-portable-bitmap; "
4149     "image/x-portable-graymap; image/x-portable-pixmap; "
4150     "image/x-portable-anymap");
4151 
4152 #define PNM_CAPS (gst_static_caps_get(&pnm_caps))
4153 
4154 #define IS_PNM_WHITESPACE(c) \
4155     ((c) == ' ' || (c) == '\r' || (c) == '\n' || (c) == 't')
4156 
4157 static void
pnm_type_find(GstTypeFind * tf,gpointer ununsed)4158 pnm_type_find (GstTypeFind * tf, gpointer ununsed)
4159 {
4160   const gchar *media_type = NULL;
4161   DataScanCtx c = { 0, NULL, 0 };
4162   guint h = 0, w = 0;
4163 
4164   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 16)))
4165     return;
4166 
4167   /* see http://en.wikipedia.org/wiki/Netpbm_format */
4168   if (c.data[0] != 'P' || c.data[1] < '1' || c.data[1] > '7' ||
4169       !IS_PNM_WHITESPACE (c.data[2]) ||
4170       (c.data[3] != '#' && c.data[3] < '0' && c.data[3] > '9'))
4171     return;
4172 
4173   switch (c.data[1]) {
4174     case '1':
4175       media_type = "image/x-portable-bitmap";   /* ASCII */
4176       break;
4177     case '2':
4178       media_type = "image/x-portable-graymap";  /* ASCII */
4179       break;
4180     case '3':
4181       media_type = "image/x-portable-pixmap";   /* ASCII */
4182       break;
4183     case '4':
4184       media_type = "image/x-portable-bitmap";   /* Raw */
4185       break;
4186     case '5':
4187       media_type = "image/x-portable-graymap";  /* Raw */
4188       break;
4189     case '6':
4190       media_type = "image/x-portable-pixmap";   /* Raw */
4191       break;
4192     case '7':
4193       media_type = "image/x-portable-anymap";
4194       break;
4195     default:
4196       g_return_if_reached ();
4197   }
4198 
4199   /* try to extract width and height as well */
4200   if (c.data[1] != '7') {
4201     gchar s[64] = { 0, }
4202     , sep1, sep2;
4203 
4204     /* need to skip any comment lines first */
4205     data_scan_ctx_advance (tf, &c, 3);
4206 
4207     if (!data_scan_ctx_ensure_data (tf, &c, 1))
4208       return;
4209 
4210     while (c.data[0] == '#') {  /* we know there's still data left */
4211       data_scan_ctx_advance (tf, &c, 1);
4212       if (!data_scan_ctx_ensure_data (tf, &c, 1))
4213         return;
4214 
4215       while (c.data[0] != '\n' && c.data[0] != '\r') {
4216         data_scan_ctx_advance (tf, &c, 1);
4217         if (!data_scan_ctx_ensure_data (tf, &c, 1))
4218           return;
4219       }
4220       data_scan_ctx_advance (tf, &c, 1);
4221       GST_LOG ("skipped comment line in PNM header");
4222       if (!data_scan_ctx_ensure_data (tf, &c, 1))
4223         return;
4224     }
4225 
4226     if (!data_scan_ctx_ensure_data (tf, &c, 32) &&
4227         !data_scan_ctx_ensure_data (tf, &c, 4)) {
4228       return;
4229     }
4230 
4231     /* need to NUL-terminate data for sscanf */
4232     memcpy (s, c.data, MIN (sizeof (s) - 1, c.size));
4233     if (sscanf (s, "%u%c%u%c", &w, &sep1, &h, &sep2) == 4 &&
4234         IS_PNM_WHITESPACE (sep1) && IS_PNM_WHITESPACE (sep2) &&
4235         w > 0 && w < G_MAXINT && h > 0 && h < G_MAXINT) {
4236       GST_LOG ("extracted PNM width and height: %dx%d", w, h);
4237     } else {
4238       w = 0;
4239       h = 0;
4240     }
4241   } else {
4242     /* FIXME: extract width + height for anymaps too */
4243   }
4244 
4245   if (w > 0 && h > 0) {
4246     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, media_type,
4247         "width", G_TYPE_INT, w, "height", G_TYPE_INT, h, NULL);
4248   } else {
4249     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, media_type, NULL);
4250   }
4251 }
4252 
4253 static GstStaticCaps sds_caps = GST_STATIC_CAPS ("audio/x-sds");
4254 
4255 #define SDS_CAPS (gst_static_caps_get(&sds_caps))
4256 static void
sds_type_find(GstTypeFind * tf,gpointer ununsed)4257 sds_type_find (GstTypeFind * tf, gpointer ununsed)
4258 {
4259   const guint8 *data = gst_type_find_peek (tf, 0, 4);
4260   guint8 mask[4] = { 0xFF, 0xFF, 0x80, 0xFF };
4261   guint8 match[4] = { 0xF0, 0x7E, 0, 0x01 };
4262   gint x;
4263 
4264   if (data) {
4265     for (x = 0; x < 4; x++) {
4266       if ((data[x] & mask[x]) != match[x]) {
4267         return;
4268       }
4269     }
4270     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SDS_CAPS);
4271   }
4272 }
4273 
4274 static GstStaticCaps ircam_caps = GST_STATIC_CAPS ("audio/x-ircam");
4275 
4276 #define IRCAM_CAPS (gst_static_caps_get(&ircam_caps))
4277 static void
ircam_type_find(GstTypeFind * tf,gpointer ununsed)4278 ircam_type_find (GstTypeFind * tf, gpointer ununsed)
4279 {
4280   const guint8 *data = gst_type_find_peek (tf, 0, 4);
4281   guint8 mask[4] = { 0xFF, 0xFF, 0xF8, 0xFF };
4282   guint8 match[4] = { 0x64, 0xA3, 0x00, 0x00 };
4283   gint x;
4284   gboolean matched = TRUE;
4285 
4286   if (!data) {
4287     return;
4288   }
4289   for (x = 0; x < 4; x++) {
4290     if ((data[x] & mask[x]) != match[x]) {
4291       matched = FALSE;
4292     }
4293   }
4294   if (matched) {
4295     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, IRCAM_CAPS);
4296     return;
4297   }
4298   /* now try the reverse version */
4299   matched = TRUE;
4300   for (x = 0; x < 4; x++) {
4301     if ((data[x] & mask[3 - x]) != match[3 - x]) {
4302       matched = FALSE;
4303     }
4304   }
4305 }
4306 
4307 /*** Matroska/WebM ***/
4308 
4309 #define EBML_HEADER           0x1A45DFA3
4310 #define EBML_VERSION          0x4286
4311 #define EBML_DOCTYPE          0x4282
4312 #define EBML_DOCTYPE_VERSION  0x4287
4313 #define MATROSKA_SEGMENT      0x18538067
4314 #define MATROSKA_CLUSTER      0x1F43B675
4315 #define MATROSKA_TRACKS       0x1654AE6B
4316 #define MATROSKA_TRACK_ENTRY  0xAE
4317 #define MATROSKA_TRACK_TYPE   0x83
4318 #define MATROSKA_STEREO_MODE  0x53B8
4319 
4320 #define EBML_MAX_LEN (2 * 1024 * 1024)
4321 
4322 typedef enum
4323 {
4324   EBML_DOCTYPE_UNKNOWN = 0,
4325   EBML_DOCTYPE_MATROSKA,
4326   EBML_DOCTYPE_WEBM
4327 } GstEbmlDocType;
4328 
4329 typedef struct
4330 {
4331   GstEbmlDocType doctype;
4332   guint audio;
4333   guint video;
4334   guint other;
4335   guint video_stereo;
4336   guint chunks;
4337   guint tracks_ok;              /* if we've seen and fully parsed the TRACKS element */
4338 } GstMatroskaInfo;
4339 
4340 static inline guint
ebml_read_chunk_header(GstTypeFind * tf,DataScanCtx * c,guint max_size,guint32 * id,guint64 * size)4341 ebml_read_chunk_header (GstTypeFind * tf, DataScanCtx * c, guint max_size,
4342     guint32 * id, guint64 * size)
4343 {
4344   guint64 mask;
4345   guint msbit_set, i, len, id_len;
4346 
4347   if (c->size < 12 || max_size < 1)
4348     return 0;
4349 
4350   /* element ID */
4351   *id = c->data[0];
4352   if ((c->data[0] & 0x80) == 0x80) {
4353     id_len = 1;
4354   } else if ((c->data[0] & 0xC0) == 0x40) {
4355     id_len = 2;
4356   } else if ((c->data[0] & 0xE0) == 0x20) {
4357     id_len = 3;
4358   } else if ((c->data[0] & 0xF0) == 0x10) {
4359     id_len = 4;
4360   } else {
4361     return 0;
4362   }
4363 
4364   if (max_size < id_len)
4365     return 0;
4366 
4367   for (i = 1; i < id_len; ++i) {
4368     *id = (*id << 8) | c->data[i];
4369   }
4370 
4371   data_scan_ctx_advance (tf, c, id_len);
4372   max_size -= id_len;
4373 
4374   /* size */
4375   if (max_size < 1 || c->data[0] == 0)
4376     return 0;
4377 
4378   msbit_set = g_bit_nth_msf (c->data[0], 8);
4379   mask = ((1 << msbit_set) - 1);
4380   *size = c->data[0] & mask;
4381   len = 7 - msbit_set;
4382 
4383   if (max_size < 1 + len)
4384     return 0;
4385   for (i = 0; i < len; ++i) {
4386     mask = (mask << 8) | 0xff;
4387     *size = (*size << 8) | c->data[1 + i];
4388   }
4389 
4390   data_scan_ctx_advance (tf, c, 1 + len);
4391 
4392   /* undefined/unknown size? (all bits 1) */
4393   if (*size == mask) {
4394     /* allow unknown size for SEGMENT chunk, bail out otherwise */
4395     if (*id == MATROSKA_SEGMENT)
4396       *size = G_MAXUINT64;
4397     else
4398       return 0;
4399   }
4400 
4401   return id_len + (1 + len);
4402 }
4403 
4404 static gboolean
ebml_parse_chunk(GstTypeFind * tf,DataScanCtx * ctx,guint32 chunk_id,guint chunk_size,GstMatroskaInfo * info,guint depth)4405 ebml_parse_chunk (GstTypeFind * tf, DataScanCtx * ctx, guint32 chunk_id,
4406     guint chunk_size, GstMatroskaInfo * info, guint depth)
4407 {                               /* FIXME: make sure input size is clipped to 32 bit */
4408   static const gchar SPACES[] = "                ";
4409   DataScanCtx c = *ctx;
4410   guint64 element_size = 0;
4411   guint32 id, hdr_len;
4412 
4413   if (depth >= 8)               /* keep SPACES large enough for depth */
4414     return FALSE;
4415 
4416   while (chunk_size > 0) {
4417     if (c.offset > EBML_MAX_LEN || !data_scan_ctx_ensure_data (tf, &c, 64))
4418       return FALSE;
4419 
4420     hdr_len = ebml_read_chunk_header (tf, &c, chunk_size, &id, &element_size);
4421     if (hdr_len == 0)
4422       return FALSE;
4423 
4424     g_assert (hdr_len <= chunk_size);
4425     chunk_size -= hdr_len;
4426 
4427     if (element_size > chunk_size)
4428       return FALSE;
4429 
4430     GST_DEBUG ("%s %08x, size %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT,
4431         SPACES + sizeof (SPACES) - 1 - (2 * depth), id, element_size,
4432         hdr_len + element_size);
4433 
4434     if (element_size >= G_MAXUINT32) {
4435       GST_DEBUG ("Chunk too big for typefinding");
4436       return FALSE;
4437     }
4438 
4439     if (!data_scan_ctx_ensure_data (tf, &c, element_size)) {
4440       GST_DEBUG ("not enough data");
4441       return FALSE;
4442     }
4443 
4444     switch (id) {
4445       case EBML_DOCTYPE:
4446         if (element_size >= 8 && memcmp (c.data, "matroska", 8) == 0)
4447           info->doctype = EBML_DOCTYPE_MATROSKA;
4448         else if (element_size >= 4 && memcmp (c.data, "webm", 4) == 0)
4449           info->doctype = EBML_DOCTYPE_WEBM;
4450         break;
4451       case MATROSKA_SEGMENT:
4452         GST_LOG ("parsing segment");
4453         ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1);
4454         GST_LOG ("parsed segment, done");
4455         return FALSE;
4456       case MATROSKA_TRACKS:
4457         GST_LOG ("parsing tracks");
4458         info->tracks_ok =
4459             ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1);
4460         GST_LOG ("parsed tracks: %s, done (after %" G_GUINT64_FORMAT " bytes)",
4461             info->tracks_ok ? "ok" : "FAIL", c.offset + element_size);
4462         return FALSE;
4463       case MATROSKA_TRACK_ENTRY:
4464         GST_LOG ("parsing track entry");
4465         if (!ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1))
4466           return FALSE;
4467         break;
4468       case MATROSKA_TRACK_TYPE:{
4469         guint type = 0, i;
4470 
4471         /* is supposed to always be 1-byte, but not everyone's following that */
4472         for (i = 0; i < element_size; ++i)
4473           type = (type << 8) | c.data[i];
4474 
4475         GST_DEBUG ("%s   track type %u",
4476             SPACES + sizeof (SPACES) - 1 - (2 * depth), type);
4477 
4478         if (type == 1)
4479           ++info->video;
4480         else if (c.data[0] == 2)
4481           ++info->audio;
4482         else
4483           ++info->other;
4484         break;
4485       }
4486       case MATROSKA_STEREO_MODE:
4487         ++info->video_stereo;
4488         break;
4489       case MATROSKA_CLUSTER:
4490         GST_WARNING ("cluster, bailing out (should've found tracks by now)");
4491         return FALSE;
4492       default:
4493         break;
4494     }
4495     data_scan_ctx_advance (tf, &c, element_size);
4496     chunk_size -= element_size;
4497     ++info->chunks;
4498   }
4499 
4500   return TRUE;
4501 }
4502 
4503 static GstStaticCaps matroska_caps = GST_STATIC_CAPS ("video/x-matroska");
4504 
4505 #define MATROSKA_CAPS (gst_static_caps_get(&matroska_caps))
4506 static void
matroska_type_find(GstTypeFind * tf,gpointer ununsed)4507 matroska_type_find (GstTypeFind * tf, gpointer ununsed)
4508 {
4509   GstTypeFindProbability prob;
4510   GstMatroskaInfo info = { 0, };
4511   const gchar *type_name;
4512   DataScanCtx c = { 0, NULL, 0 };
4513   gboolean is_audio;
4514   guint64 size;
4515   guint32 id, hdr_len;
4516 
4517   if (!data_scan_ctx_ensure_data (tf, &c, 64))
4518     return;
4519 
4520   if (GST_READ_UINT32_BE (c.data) != EBML_HEADER)
4521     return;
4522 
4523   while (c.offset < EBML_MAX_LEN && data_scan_ctx_ensure_data (tf, &c, 64)) {
4524     hdr_len = ebml_read_chunk_header (tf, &c, c.size, &id, &size);
4525     if (hdr_len == 0)
4526       return;
4527 
4528     GST_INFO ("=== top-level chunk %08x, size %" G_GUINT64_FORMAT
4529         " / %" G_GUINT64_FORMAT, id, size, size + hdr_len);
4530 
4531     if (!ebml_parse_chunk (tf, &c, id, size, &info, 0))
4532       break;
4533     data_scan_ctx_advance (tf, &c, size);
4534     GST_INFO ("=== done with chunk %08x", id);
4535     if (id == MATROSKA_SEGMENT)
4536       break;
4537   }
4538 
4539   GST_INFO ("audio=%u video=%u other=%u chunks=%u doctype=%d all_tracks=%d",
4540       info.audio, info.video, info.other, info.chunks, info.doctype,
4541       info.tracks_ok);
4542 
4543   /* perhaps we should bail out if tracks_ok is FALSE and wait for more data?
4544    * (we would need new API to signal this properly and prevent other
4545    * typefinders from taking over the decision then) */
4546   is_audio = (info.audio > 0 && info.video == 0 && info.other == 0);
4547 
4548   if (info.doctype == EBML_DOCTYPE_WEBM) {
4549     type_name = (is_audio) ? "audio/webm" : "video/webm";
4550   } else if (info.video > 0 && info.video_stereo) {
4551     type_name = "video/x-matroska-3d";
4552   } else {
4553     type_name = (is_audio) ? "audio/x-matroska" : "video/x-matroska";
4554   }
4555 
4556   if (info.doctype == EBML_DOCTYPE_UNKNOWN)
4557     prob = GST_TYPE_FIND_LIKELY;
4558   else
4559     prob = GST_TYPE_FIND_MAXIMUM;
4560 
4561   gst_type_find_suggest_simple (tf, prob, type_name, NULL);
4562 }
4563 
4564 /*** application/mxf ***/
4565 static GstStaticCaps mxf_caps = GST_STATIC_CAPS ("application/mxf");
4566 
4567 #define MXF_MAX_PROBE_LENGTH (1024 * 64)
4568 #define MXF_CAPS (gst_static_caps_get(&mxf_caps))
4569 
4570 /*
4571  * MXF files start with a header partition pack key of 16 bytes which is defined
4572  * at SMPTE-377M 6.1. Before this there can be up to 64K of run-in which _must_
4573  * not contain the partition pack key.
4574  */
4575 static void
mxf_type_find(GstTypeFind * tf,gpointer ununsed)4576 mxf_type_find (GstTypeFind * tf, gpointer ununsed)
4577 {
4578   static const guint8 partition_pack_key[] =
4579       { 0x06, 0x0e, 0x2b, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0d, 0x01, 0x02, 0x01,
4580     0x01
4581   };
4582   DataScanCtx c = { 0, NULL, 0 };
4583 
4584   while (c.offset <= MXF_MAX_PROBE_LENGTH) {
4585     guint i;
4586     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 1024)))
4587       break;
4588 
4589     /* look over in chunks of 1kbytes to avoid too much overhead */
4590 
4591     for (i = 0; i < 1024 - 16; i++) {
4592       /* Check first byte before calling more expensive memcmp function */
4593       if (G_UNLIKELY (c.data[i] == 0x06
4594               && memcmp (c.data + i, partition_pack_key, 13) == 0)) {
4595         /* Header partition pack? */
4596         if (c.data[i + 13] != 0x02)
4597           goto advance;
4598 
4599         /* Partition status */
4600         if (c.data[i + 14] >= 0x05)
4601           goto advance;
4602 
4603         /* Reserved, must be 0x00 */
4604         if (c.data[i + 15] != 0x00)
4605           goto advance;
4606 
4607         gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MXF_CAPS);
4608         return;
4609       }
4610     }
4611 
4612   advance:
4613     data_scan_ctx_advance (tf, &c, 1024 - 16);
4614   }
4615 }
4616 
4617 /*** video/x-dv ***/
4618 
4619 static GstStaticCaps dv_caps = GST_STATIC_CAPS ("video/x-dv, "
4620     "systemstream = (boolean) true");
4621 #define DV_CAPS (gst_static_caps_get(&dv_caps))
4622 static void
dv_type_find(GstTypeFind * tf,gpointer private)4623 dv_type_find (GstTypeFind * tf, gpointer private)
4624 {
4625   const guint8 *data;
4626 
4627   data = gst_type_find_peek (tf, 0, 5);
4628 
4629   /* check for DIF  and DV flag */
4630   if (data && (data[0] == 0x1f) && (data[1] == 0x07) && (data[2] == 0x00)) {
4631     const gchar *format;
4632 
4633     if (data[3] & 0x80) {
4634       format = "PAL";
4635     } else {
4636       format = "NTSC";
4637     }
4638 
4639     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, "video/x-dv",
4640         "systemstream", G_TYPE_BOOLEAN, TRUE,
4641         "format", G_TYPE_STRING, format, NULL);
4642   }
4643 }
4644 
4645 
4646 /*** Ogg variants ***/
4647 static GstStaticCaps ogg_caps =
4648     GST_STATIC_CAPS ("application/ogg;video/ogg;audio/ogg;application/kate");
4649 
4650 #define OGG_CAPS (gst_static_caps_get(&ogg_caps))
4651 
4652 typedef enum
4653 {
4654   OGG_AUDIO = 0,
4655   OGG_VIDEO,
4656   OGG_KATE,
4657   OGG_OTHER,
4658   OGG_SKELETON,
4659   OGG_ANNODEX,
4660   OGG_NUM
4661 } GstOggStreamType;
4662 
4663 static void
ogganx_type_find(GstTypeFind * tf,gpointer private)4664 ogganx_type_find (GstTypeFind * tf, gpointer private)
4665 {
4666   const gchar *media_type;
4667   DataScanCtx c = { 0, NULL, 0 };
4668   guint ogg_syncs = 0;
4669   guint hdr_count[OGG_NUM] = { 0, };
4670   static const struct
4671   {
4672     const gchar marker[10];
4673     guint8 marker_size;
4674     GstOggStreamType stream_type;
4675   } markers[] = {
4676     {
4677     "\001vorbis", 7, OGG_AUDIO}, {
4678     "\200theora", 7, OGG_VIDEO}, {
4679     "fLaC", 4, OGG_AUDIO}, {
4680     "\177FLAC", 5, OGG_AUDIO}, {
4681     "Speex", 5, OGG_AUDIO}, {
4682     "CMML\0\0\0\0", 8, OGG_OTHER}, {
4683     "PCM     ", 8, OGG_AUDIO}, {
4684     "Annodex", 7, OGG_ANNODEX}, {
4685     "fishead", 7, OGG_SKELETON}, {
4686     "AnxData", 7, OGG_ANNODEX}, {
4687     "CELT    ", 8, OGG_AUDIO}, {
4688     "\200kate\0\0\0", 8, OGG_KATE}, {
4689     "BBCD\0", 5, OGG_VIDEO}, {
4690     "OVP80\1\1", 7, OGG_VIDEO}, {
4691     "OpusHead", 8, OGG_AUDIO}, {
4692     "\001audio\0\0\0", 9, OGG_AUDIO}, {
4693     "\001video\0\0\0", 9, OGG_VIDEO}, {
4694     "\001text\0\0\0", 9, OGG_OTHER}
4695   };
4696 
4697   while (c.offset < 4096 && data_scan_ctx_ensure_data (tf, &c, 64)) {
4698     guint size, i;
4699 
4700     if (memcmp (c.data, "OggS", 5) != 0)
4701       break;
4702 
4703     ++ogg_syncs;
4704 
4705     /* check if BOS */
4706     if (c.data[5] != 0x02)
4707       break;
4708 
4709     /* headers should only have one segment */
4710     if (c.data[26] != 1)
4711       break;
4712 
4713     size = c.data[27];
4714     if (size < 8)
4715       break;
4716 
4717     data_scan_ctx_advance (tf, &c, 28);
4718 
4719     if (!data_scan_ctx_ensure_data (tf, &c, MAX (size, 8)))
4720       break;
4721 
4722     for (i = 0; i < G_N_ELEMENTS (markers); ++i) {
4723       if (memcmp (c.data, markers[i].marker, markers[i].marker_size) == 0) {
4724         ++hdr_count[markers[i].stream_type];
4725         break;
4726       }
4727     }
4728 
4729     if (i == G_N_ELEMENTS (markers)) {
4730       GST_MEMDUMP ("unknown Ogg stream marker", c.data, size);
4731       ++hdr_count[OGG_OTHER];
4732     }
4733 
4734     data_scan_ctx_advance (tf, &c, size);
4735   }
4736 
4737   if (ogg_syncs == 0)
4738     return;
4739 
4740   /* We don't bother with annodex types. FIXME: what about XSPF? */
4741   if (hdr_count[OGG_VIDEO] > 0) {
4742     media_type = "video/ogg";
4743   } else if (hdr_count[OGG_AUDIO] > 0) {
4744     media_type = "audio/ogg";
4745   } else if (hdr_count[OGG_KATE] > 0 && hdr_count[OGG_OTHER] == 0) {
4746     media_type = "application/kate";
4747   } else {
4748     media_type = "application/ogg";
4749   }
4750 
4751   GST_INFO ("found %s (audio:%u, video:%u, annodex:%u, skeleton:%u, other:%u)",
4752       media_type, hdr_count[OGG_AUDIO], hdr_count[OGG_VIDEO],
4753       hdr_count[OGG_ANNODEX], hdr_count[OGG_SKELETON], hdr_count[OGG_OTHER]);
4754 
4755   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, media_type, NULL);
4756 }
4757 
4758 /*** audio/x-vorbis ***/
4759 static GstStaticCaps vorbis_caps = GST_STATIC_CAPS ("audio/x-vorbis");
4760 
4761 #define VORBIS_CAPS (gst_static_caps_get(&vorbis_caps))
4762 static void
vorbis_type_find(GstTypeFind * tf,gpointer private)4763 vorbis_type_find (GstTypeFind * tf, gpointer private)
4764 {
4765   const guint8 *data = gst_type_find_peek (tf, 0, 30);
4766 
4767   if (data) {
4768     guint blocksize_0;
4769     guint blocksize_1;
4770 
4771     /* 1 byte packet type (identification=0x01)
4772        6 byte string "vorbis"
4773        4 byte vorbis version */
4774     if (memcmp (data, "\001vorbis\000\000\000\000", 11) != 0)
4775       return;
4776     data += 11;
4777     /* 1 byte channels must be != 0 */
4778     if (data[0] == 0)
4779       return;
4780     data++;
4781     /* 4 byte samplerate must be != 0 */
4782     if (GST_READ_UINT32_LE (data) == 0)
4783       return;
4784     data += 16;
4785     /* blocksize checks */
4786     blocksize_0 = data[0] & 0x0F;
4787     blocksize_1 = (data[0] & 0xF0) >> 4;
4788     if (blocksize_0 > blocksize_1)
4789       return;
4790     if (blocksize_0 < 6 || blocksize_0 > 13)
4791       return;
4792     if (blocksize_1 < 6 || blocksize_1 > 13)
4793       return;
4794     data++;
4795     /* framing bit */
4796     if ((data[0] & 0x01) != 1)
4797       return;
4798     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, VORBIS_CAPS);
4799   }
4800 }
4801 
4802 /*** video/x-theora ***/
4803 
4804 static GstStaticCaps theora_caps = GST_STATIC_CAPS ("video/x-theora");
4805 
4806 #define THEORA_CAPS (gst_static_caps_get(&theora_caps))
4807 static void
theora_type_find(GstTypeFind * tf,gpointer private)4808 theora_type_find (GstTypeFind * tf, gpointer private)
4809 {
4810   const guint8 *data = gst_type_find_peek (tf, 0, 7);   //42);
4811 
4812   if (data) {
4813     if (data[0] != 0x80)
4814       return;
4815     if (memcmp (&data[1], "theora", 6) != 0)
4816       return;
4817     /* FIXME: make this more reliable when specs are out */
4818 
4819     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, THEORA_CAPS);
4820   }
4821 }
4822 
4823 /*** kate ***/
4824 static void
kate_type_find(GstTypeFind * tf,gpointer private)4825 kate_type_find (GstTypeFind * tf, gpointer private)
4826 {
4827   const guint8 *data = gst_type_find_peek (tf, 0, 64);
4828   gchar category[16] = { 0, };
4829 
4830   if (G_UNLIKELY (data == NULL))
4831     return;
4832 
4833   /* see: http://wiki.xiph.org/index.php/OggKate#Format_specification */
4834   if (G_LIKELY (memcmp (data, "\200kate\0\0\0", 8) != 0))
4835     return;
4836 
4837   /* make sure we always have a NUL-terminated string */
4838   memcpy (category, data + 48, 15);
4839   GST_LOG ("kate category: %s", category);
4840   /* canonical categories for subtitles: subtitles, spu-subtitles, SUB, K-SPU */
4841   if (strcmp (category, "subtitles") == 0 || strcmp (category, "SUB") == 0 ||
4842       strcmp (category, "spu-subtitles") == 0 ||
4843       strcmp (category, "K-SPU") == 0) {
4844     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
4845         "subtitle/x-kate", NULL);
4846   } else {
4847     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
4848         "application/x-kate", NULL);
4849   }
4850 }
4851 
4852 /*** WEBVTTT subtitles ***/
4853 static GstStaticCaps webvtt_caps =
4854 GST_STATIC_CAPS ("application/x-subtitle-vtt, parsed=(boolean)false");
4855 #define WEBVTT_CAPS (gst_static_caps_get(&webvtt_caps))
4856 
4857 static void
webvtt_type_find(GstTypeFind * tf,gpointer private)4858 webvtt_type_find (GstTypeFind * tf, gpointer private)
4859 {
4860   const guint8 *data;
4861 
4862   data = gst_type_find_peek (tf, 0, 10);
4863 
4864   if (data == NULL)
4865     return;
4866 
4867   /* there might be a UTF-8 BOM at the beginning */
4868   if (memcmp (data, "WEBVTT", 6) != 0 && memcmp (data + 3, "WEBVTT", 6) != 0) {
4869     return;
4870   }
4871 
4872   if (data[0] != 'W') {
4873     if (data[0] != 0xef || data[1] != 0xbb || data[2] != 0xbf)
4874       return;                   /* Not a UTF-8 BOM */
4875     data += 3;
4876   }
4877 
4878   /* After the WEBVTT magic must be one of these chars:
4879    *   0x20 (space), 0x9 (tab), 0xa (LF) or 0xd (CR) */
4880   if (data[6] != 0x20 && data[6] != 0x9 && data[6] != 0xa && data[6] != 0xd) {
4881     return;
4882   }
4883 
4884   gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, WEBVTT_CAPS);
4885 }
4886 
4887 /*** application/x-ogm-video or audio***/
4888 
4889 static GstStaticCaps ogmvideo_caps =
4890 GST_STATIC_CAPS ("application/x-ogm-video");
4891 #define OGMVIDEO_CAPS (gst_static_caps_get(&ogmvideo_caps))
4892 static void
ogmvideo_type_find(GstTypeFind * tf,gpointer private)4893 ogmvideo_type_find (GstTypeFind * tf, gpointer private)
4894 {
4895   const guint8 *data = gst_type_find_peek (tf, 0, 9);
4896 
4897   if (data) {
4898     if (memcmp (data, "\001video\000\000\000", 9) != 0)
4899       return;
4900     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGMVIDEO_CAPS);
4901   }
4902 }
4903 
4904 static GstStaticCaps ogmaudio_caps =
4905 GST_STATIC_CAPS ("application/x-ogm-audio");
4906 #define OGMAUDIO_CAPS (gst_static_caps_get(&ogmaudio_caps))
4907 static void
ogmaudio_type_find(GstTypeFind * tf,gpointer private)4908 ogmaudio_type_find (GstTypeFind * tf, gpointer private)
4909 {
4910   const guint8 *data = gst_type_find_peek (tf, 0, 9);
4911 
4912   if (data) {
4913     if (memcmp (data, "\001audio\000\000\000", 9) != 0)
4914       return;
4915     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGMAUDIO_CAPS);
4916   }
4917 }
4918 
4919 static GstStaticCaps ogmtext_caps = GST_STATIC_CAPS ("application/x-ogm-text");
4920 
4921 #define OGMTEXT_CAPS (gst_static_caps_get(&ogmtext_caps))
4922 static void
ogmtext_type_find(GstTypeFind * tf,gpointer private)4923 ogmtext_type_find (GstTypeFind * tf, gpointer private)
4924 {
4925   const guint8 *data = gst_type_find_peek (tf, 0, 9);
4926 
4927   if (data) {
4928     if (memcmp (data, "\001text\000\000\000\000", 9) != 0)
4929       return;
4930     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGMTEXT_CAPS);
4931   }
4932 }
4933 
4934 /*** audio/x-speex ***/
4935 
4936 static GstStaticCaps speex_caps = GST_STATIC_CAPS ("audio/x-speex");
4937 
4938 #define SPEEX_CAPS (gst_static_caps_get(&speex_caps))
4939 static void
speex_type_find(GstTypeFind * tf,gpointer private)4940 speex_type_find (GstTypeFind * tf, gpointer private)
4941 {
4942   const guint8 *data = gst_type_find_peek (tf, 0, 80);
4943 
4944   if (data) {
4945     /* 8 byte string "Speex   "
4946        24 byte speex version string + int */
4947     if (memcmp (data, "Speex   ", 8) != 0)
4948       return;
4949     data += 32;
4950 
4951     /* 4 byte header size >= 80 */
4952     if (GST_READ_UINT32_LE (data) < 80)
4953       return;
4954     data += 4;
4955 
4956     /* 4 byte sample rate <= 48000 */
4957     if (GST_READ_UINT32_LE (data) > 48000)
4958       return;
4959     data += 4;
4960 
4961     /* currently there are only 3 speex modes. */
4962     if (GST_READ_UINT32_LE (data) > 3)
4963       return;
4964     data += 12;
4965 
4966     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SPEEX_CAPS);
4967   }
4968 }
4969 
4970 /*** audio/x-celt ***/
4971 
4972 static GstStaticCaps celt_caps = GST_STATIC_CAPS ("audio/x-celt");
4973 
4974 #define CELT_CAPS (gst_static_caps_get(&celt_caps))
4975 static void
celt_type_find(GstTypeFind * tf,gpointer private)4976 celt_type_find (GstTypeFind * tf, gpointer private)
4977 {
4978   const guint8 *data = gst_type_find_peek (tf, 0, 8);
4979 
4980   if (data) {
4981     /* 8 byte string "CELT   " */
4982     if (memcmp (data, "CELT    ", 8) != 0)
4983       return;
4984 
4985     /* TODO: Check other values of the CELT header */
4986     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, CELT_CAPS);
4987   }
4988 }
4989 
4990 /*** application/x-ogg-skeleton ***/
4991 static GstStaticCaps ogg_skeleton_caps =
4992 GST_STATIC_CAPS ("application/x-ogg-skeleton, parsed=(boolean)FALSE");
4993 #define OGG_SKELETON_CAPS (gst_static_caps_get(&ogg_skeleton_caps))
4994 static void
oggskel_type_find(GstTypeFind * tf,gpointer private)4995 oggskel_type_find (GstTypeFind * tf, gpointer private)
4996 {
4997   const guint8 *data = gst_type_find_peek (tf, 0, 12);
4998 
4999   if (data) {
5000     /* 8 byte string "fishead\0" for the ogg skeleton stream */
5001     if (memcmp (data, "fishead\0", 8) != 0)
5002       return;
5003     data += 8;
5004 
5005     /* Require that the header contains version 3.0 */
5006     if (GST_READ_UINT16_LE (data) != 3)
5007       return;
5008     data += 2;
5009     if (GST_READ_UINT16_LE (data) != 0)
5010       return;
5011 
5012     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGG_SKELETON_CAPS);
5013   }
5014 }
5015 
5016 static GstStaticCaps cmml_caps = GST_STATIC_CAPS ("text/x-cmml");
5017 
5018 #define CMML_CAPS (gst_static_caps_get(&cmml_caps))
5019 static void
cmml_type_find(GstTypeFind * tf,gpointer private)5020 cmml_type_find (GstTypeFind * tf, gpointer private)
5021 {
5022   /* Header is 12 bytes minimum (though we don't check the minor version */
5023   const guint8 *data = gst_type_find_peek (tf, 0, 12);
5024 
5025   if (data) {
5026 
5027     /* 8 byte string "CMML\0\0\0\0" for the magic number */
5028     if (memcmp (data, "CMML\0\0\0\0", 8) != 0)
5029       return;
5030     data += 8;
5031 
5032     /* Require that the header contains at least version 2.0 */
5033     if (GST_READ_UINT16_LE (data) < 2)
5034       return;
5035 
5036     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, CMML_CAPS);
5037   }
5038 }
5039 
5040 /*** application/x-tar ***/
5041 
5042 static GstStaticCaps tar_caps = GST_STATIC_CAPS ("application/x-tar");
5043 
5044 #define TAR_CAPS (gst_static_caps_get(&tar_caps))
5045 #define OLDGNU_MAGIC "ustar  "  /* 7 chars and a NUL */
5046 #define NEWGNU_MAGIC "ustar"    /* 5 chars and a NUL */
5047 static void
tar_type_find(GstTypeFind * tf,gpointer unused)5048 tar_type_find (GstTypeFind * tf, gpointer unused)
5049 {
5050   const guint8 *data = gst_type_find_peek (tf, 257, 8);
5051 
5052   /* of course we are not certain, but we don't want other typefind funcs
5053    * to detect formats of files within the tar archive, e.g. mp3s */
5054   if (data) {
5055     if (memcmp (data, OLDGNU_MAGIC, 8) == 0) {  /* sic */
5056       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, TAR_CAPS);
5057     } else if (memcmp (data, NEWGNU_MAGIC, 6) == 0 &&   /* sic */
5058         g_ascii_isdigit (data[6]) && g_ascii_isdigit (data[7])) {
5059       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, TAR_CAPS);
5060     }
5061   }
5062 }
5063 
5064 /*** application/x-ar ***/
5065 
5066 static GstStaticCaps ar_caps = GST_STATIC_CAPS ("application/x-ar");
5067 
5068 #define AR_CAPS (gst_static_caps_get(&ar_caps))
5069 static void
ar_type_find(GstTypeFind * tf,gpointer unused)5070 ar_type_find (GstTypeFind * tf, gpointer unused)
5071 {
5072   const guint8 *data = gst_type_find_peek (tf, 0, 24);
5073 
5074   if (data && memcmp (data, "!<arch>", 7) == 0) {
5075     gint i;
5076 
5077     for (i = 7; i < 24; ++i) {
5078       if (!g_ascii_isprint (data[i]) && data[i] != '\n') {
5079         gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, AR_CAPS);
5080       }
5081     }
5082 
5083     gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, AR_CAPS);
5084   }
5085 }
5086 
5087 /*** audio/x-au ***/
5088 
5089 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5090  * as it is only possible to register one typefind factory per 'name'
5091  * (which is in this case the caps), and the first one would be replaced by
5092  * the second one. */
5093 static GstStaticCaps au_caps = GST_STATIC_CAPS ("audio/x-au");
5094 
5095 #define AU_CAPS (gst_static_caps_get(&au_caps))
5096 static void
au_type_find(GstTypeFind * tf,gpointer unused)5097 au_type_find (GstTypeFind * tf, gpointer unused)
5098 {
5099   const guint8 *data = gst_type_find_peek (tf, 0, 4);
5100 
5101   if (data) {
5102     if (memcmp (data, ".snd", 4) == 0 || memcmp (data, "dns.", 4) == 0) {
5103       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, AU_CAPS);
5104     }
5105   }
5106 }
5107 
5108 
5109 /*** video/x-nuv ***/
5110 
5111 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5112  * as it is only possible to register one typefind factory per 'name'
5113  * (which is in this case the caps), and the first one would be replaced by
5114  * the second one. */
5115 static GstStaticCaps nuv_caps = GST_STATIC_CAPS ("video/x-nuv");
5116 
5117 #define NUV_CAPS (gst_static_caps_get(&nuv_caps))
5118 static void
nuv_type_find(GstTypeFind * tf,gpointer unused)5119 nuv_type_find (GstTypeFind * tf, gpointer unused)
5120 {
5121   const guint8 *data = gst_type_find_peek (tf, 0, 11);
5122 
5123   if (data) {
5124     if (memcmp (data, "MythTVVideo", 11) == 0
5125         || memcmp (data, "NuppelVideo", 11) == 0) {
5126       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, NUV_CAPS);
5127     }
5128   }
5129 }
5130 
5131 /*** audio/x-paris ***/
5132 /* NOTE: do not replace this function with two TYPE_FIND_REGISTER_START_WITH */
5133 static GstStaticCaps paris_caps = GST_STATIC_CAPS ("audio/x-paris");
5134 
5135 #define PARIS_CAPS (gst_static_caps_get(&paris_caps))
5136 static void
paris_type_find(GstTypeFind * tf,gpointer unused)5137 paris_type_find (GstTypeFind * tf, gpointer unused)
5138 {
5139   const guint8 *data = gst_type_find_peek (tf, 0, 4);
5140 
5141   if (data) {
5142     if (memcmp (data, " paf", 4) == 0 || memcmp (data, "fap ", 4) == 0) {
5143       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, PARIS_CAPS);
5144     }
5145   }
5146 }
5147 
5148 /*** audio/x-sbc ***/
5149 static GstStaticCaps sbc_caps = GST_STATIC_CAPS ("audio/x-sbc");
5150 #define SBC_CAPS (gst_static_caps_get(&sbc_caps))
5151 
5152 static gsize
sbc_check_header(const guint8 * data,gsize len,guint * rate,guint * channels)5153 sbc_check_header (const guint8 * data, gsize len, guint * rate,
5154     guint * channels)
5155 {
5156   static const guint16 sbc_rates[4] = { 16000, 32000, 44100, 48000 };
5157   static const guint8 sbc_blocks[4] = { 4, 8, 12, 16 };
5158   guint n_blocks, ch_mode, n_subbands, bitpool;
5159 
5160   if (data[0] != 0x9C || len < 4)
5161     return 0;
5162 
5163   n_blocks = sbc_blocks[(data[1] >> 4) & 0x03];
5164   ch_mode = (data[1] >> 2) & 0x03;
5165   n_subbands = (data[1] & 0x01) ? 8 : 4;
5166   bitpool = data[2];
5167   if (bitpool < 2)
5168     return 0;
5169 
5170   *rate = sbc_rates[(data[1] >> 6) & 0x03];
5171   *channels = (ch_mode == 0) ? 1 : 2;
5172 
5173   if (ch_mode == 0)
5174     return 4 + (n_subbands * 1) / 2 + (n_blocks * 1 * bitpool) / 8;
5175   else if (ch_mode == 1)
5176     return 4 + (n_subbands * 2) / 2 + (n_blocks * 2 * bitpool) / 8;
5177   else if (ch_mode == 2)
5178     return 4 + (n_subbands * 2) / 2 + (n_blocks * bitpool) / 8;
5179   else if (ch_mode == 3)
5180     return 4 + (n_subbands * 2) / 2 + (n_subbands + n_blocks * bitpool) / 8;
5181 
5182   return 0;
5183 }
5184 
5185 static void
sbc_type_find(GstTypeFind * tf,gpointer unused)5186 sbc_type_find (GstTypeFind * tf, gpointer unused)
5187 {
5188   const guint8 *data;
5189   gsize frame_len;
5190   guint i, rate, channels, offset = 0;
5191 
5192   for (i = 0; i < 10; ++i) {
5193     data = gst_type_find_peek (tf, offset, 8);
5194     if (data == NULL)
5195       return;
5196 
5197     frame_len = sbc_check_header (data, 8, &rate, &channels);
5198     if (frame_len == 0)
5199       return;
5200 
5201     offset += frame_len;
5202   }
5203   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_POSSIBLE, "audio/x-sbc",
5204       "rate", G_TYPE_INT, rate, "channels", G_TYPE_INT, channels,
5205       "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
5206 }
5207 
5208 /*** audio/iLBC-sh ***/
5209 /* NOTE: do not replace this function with two TYPE_FIND_REGISTER_START_WITH */
5210 static GstStaticCaps ilbc_caps = GST_STATIC_CAPS ("audio/iLBC-sh");
5211 
5212 #define ILBC_CAPS (gst_static_caps_get(&ilbc_caps))
5213 static void
ilbc_type_find(GstTypeFind * tf,gpointer unused)5214 ilbc_type_find (GstTypeFind * tf, gpointer unused)
5215 {
5216   const guint8 *data = gst_type_find_peek (tf, 0, 8);
5217 
5218   if (data) {
5219     if (memcmp (data, "#!iLBC30", 8) == 0 || memcmp (data, "#!iLBC20", 8) == 0) {
5220       gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, ILBC_CAPS);
5221     }
5222   }
5223 }
5224 
5225 /*** application/x-ms-dos-executable ***/
5226 
5227 static GstStaticCaps msdos_caps =
5228 GST_STATIC_CAPS ("application/x-ms-dos-executable");
5229 #define MSDOS_CAPS (gst_static_caps_get(&msdos_caps))
5230 /* see http://www.madchat.org/vxdevl/papers/winsys/pefile/pefile.htm */
5231 static void
msdos_type_find(GstTypeFind * tf,gpointer unused)5232 msdos_type_find (GstTypeFind * tf, gpointer unused)
5233 {
5234   const guint8 *data = gst_type_find_peek (tf, 0, 64);
5235 
5236   if (data && data[0] == 'M' && data[1] == 'Z' &&
5237       GST_READ_UINT16_LE (data + 8) == 4) {
5238     guint32 pe_offset = GST_READ_UINT32_LE (data + 60);
5239 
5240     data = gst_type_find_peek (tf, pe_offset, 2);
5241     if (data && data[0] == 'P' && data[1] == 'E') {
5242       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, MSDOS_CAPS);
5243     }
5244   }
5245 }
5246 
5247 /*** application/x-mmsh ***/
5248 
5249 static GstStaticCaps mmsh_caps = GST_STATIC_CAPS ("application/x-mmsh");
5250 
5251 #define MMSH_CAPS gst_static_caps_get(&mmsh_caps)
5252 
5253 /* This is to recognise mssh-over-http */
5254 static void
mmsh_type_find(GstTypeFind * tf,gpointer unused)5255 mmsh_type_find (GstTypeFind * tf, gpointer unused)
5256 {
5257   static const guint8 asf_marker[16] = { 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66,
5258     0xcf, 0x11, 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
5259   };
5260 
5261   const guint8 *data;
5262 
5263   data = gst_type_find_peek (tf, 0, 2 + 2 + 4 + 2 + 2 + 16);
5264   if (data && data[0] == 0x24 && data[1] == 0x48 &&
5265       GST_READ_UINT16_LE (data + 2) > 2 + 2 + 4 + 2 + 2 + 16 &&
5266       memcmp (data + 2 + 2 + 4 + 2 + 2, asf_marker, 16) == 0) {
5267     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, MMSH_CAPS);
5268   }
5269 }
5270 
5271 /*** video/x-dirac ***/
5272 
5273 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5274  * as it is only possible to register one typefind factory per 'name'
5275  * (which is in this case the caps), and the first one would be replaced by
5276  * the second one. */
5277 static GstStaticCaps dirac_caps = GST_STATIC_CAPS ("video/x-dirac");
5278 
5279 #define DIRAC_CAPS (gst_static_caps_get(&dirac_caps))
5280 static void
dirac_type_find(GstTypeFind * tf,gpointer unused)5281 dirac_type_find (GstTypeFind * tf, gpointer unused)
5282 {
5283   const guint8 *data = gst_type_find_peek (tf, 0, 8);
5284 
5285   if (data) {
5286     if (memcmp (data, "BBCD", 4) == 0 || memcmp (data, "KW-DIRAC", 8) == 0) {
5287       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, DIRAC_CAPS);
5288     }
5289   }
5290 }
5291 
5292 /*** audio/x-tap-tap ***/
5293 
5294 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5295  * as it is only possible to register one typefind factory per 'name'
5296  * (which is in this case the caps), and the first one would be replaced by
5297  * the second one. */
5298 static GstStaticCaps tap_caps = GST_STATIC_CAPS ("audio/x-tap-tap");
5299 
5300 #define TAP_CAPS (gst_static_caps_get(&tap_caps))
5301 static void
tap_type_find(GstTypeFind * tf,gpointer unused)5302 tap_type_find (GstTypeFind * tf, gpointer unused)
5303 {
5304   const guint8 *data = gst_type_find_peek (tf, 0, 16);
5305 
5306   if (data) {
5307     if (memcmp (data, "C64-TAPE-RAW", 12) == 0
5308         || memcmp (data, "C16-TAPE-RAW", 12) == 0) {
5309       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TAP_CAPS);
5310     }
5311   }
5312 }
5313 
5314 /*** video/vivo ***/
5315 
5316 static GstStaticCaps vivo_caps = GST_STATIC_CAPS ("video/vivo");
5317 
5318 #define VIVO_CAPS gst_static_caps_get(&vivo_caps)
5319 
5320 static void
vivo_type_find(GstTypeFind * tf,gpointer unused)5321 vivo_type_find (GstTypeFind * tf, gpointer unused)
5322 {
5323   static const guint8 vivo_marker[] = { 'V', 'e', 'r', 's', 'i', 'o', 'n',
5324     ':', 'V', 'i', 'v', 'o', '/'
5325   };
5326   const guint8 *data;
5327   guint hdr_len, pos;
5328 
5329   data = gst_type_find_peek (tf, 0, 1024);
5330   if (data == NULL || data[0] != 0x00)
5331     return;
5332 
5333   if ((data[1] & 0x80)) {
5334     if ((data[2] & 0x80))
5335       return;
5336     hdr_len = ((guint) (data[1] & 0x7f)) << 7;
5337     hdr_len += data[2];
5338     if (hdr_len > 2048)
5339       return;
5340     pos = 3;
5341   } else {
5342     hdr_len = data[1];
5343     pos = 2;
5344   }
5345 
5346   /* 1008 = 1022 - strlen ("Version:Vivo/") - 1 */
5347   while (pos < 1008 && data[pos] == '\r' && data[pos + 1] == '\n')
5348     pos += 2;
5349 
5350   if (memcmp (data + pos, vivo_marker, sizeof (vivo_marker)) == 0) {
5351     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, VIVO_CAPS);
5352   }
5353 }
5354 
5355 /*** XDG MIME typefinder (to avoid false positives mostly) ***/
5356 
5357 #ifdef USE_GIO
5358 static void
xdgmime_typefind(GstTypeFind * find,gpointer user_data)5359 xdgmime_typefind (GstTypeFind * find, gpointer user_data)
5360 {
5361   gchar *mimetype;
5362   gsize length = 16384;
5363   guint64 tf_length;
5364   const guint8 *data;
5365   gchar *tmp;
5366 
5367   if ((tf_length = gst_type_find_get_length (find)) > 0)
5368     length = MIN (length, tf_length);
5369 
5370   if ((data = gst_type_find_peek (find, 0, length)) == NULL)
5371     return;
5372 
5373   tmp = g_content_type_guess (NULL, data, length, NULL);
5374   if (tmp == NULL || g_content_type_is_unknown (tmp)) {
5375     g_free (tmp);
5376     return;
5377   }
5378 
5379   mimetype = g_content_type_get_mime_type (tmp);
5380   g_free (tmp);
5381 
5382   if (mimetype == NULL)
5383     return;
5384 
5385   GST_DEBUG ("Got mimetype '%s'", mimetype);
5386 
5387   /* Ignore audio/video types:
5388    *  - our own typefinders in -base are likely to be better at this
5389    *    (and if they're not, we really want to fix them, that's why we don't
5390    *    report xdg-detected audio/video types at all, not even with a low
5391    *    probability)
5392    *  - we want to detect GStreamer media types and not MIME types
5393    *  - the purpose of this xdg mime finder is mainly to prevent false
5394    *    positives of non-media formats, not to typefind audio/video formats */
5395   if (g_str_has_prefix (mimetype, "audio/") ||
5396       g_str_has_prefix (mimetype, "video/")) {
5397     GST_LOG ("Ignoring audio/video mime type");
5398     g_free (mimetype);
5399     return;
5400   }
5401 
5402   /* Again, we mainly want the xdg typefinding to prevent false-positives on
5403    * non-media formats, so suggest the type with a probability that trumps
5404    * uncertain results of our typefinders, but not more than that. */
5405   GST_LOG ("Suggesting '%s' with probability POSSIBLE", mimetype);
5406   gst_type_find_suggest_simple (find, GST_TYPE_FIND_POSSIBLE, mimetype, NULL);
5407   g_free (mimetype);
5408 }
5409 #endif /* USE_GIO */
5410 
5411 /*** Windows icon typefinder (to avoid false positives mostly) ***/
5412 
5413 static void
windows_icon_typefind(GstTypeFind * find,gpointer user_data)5414 windows_icon_typefind (GstTypeFind * find, gpointer user_data)
5415 {
5416   const guint8 *data;
5417   gint64 datalen;
5418   guint16 type, nimages;
5419   gint32 size, offset;
5420 
5421   datalen = gst_type_find_get_length (find);
5422   if (datalen < 22)
5423     return;
5424   if ((data = gst_type_find_peek (find, 0, 6)) == NULL)
5425     return;
5426 
5427   /* header - simple and not enough to rely on it alone */
5428   if (GST_READ_UINT16_LE (data) != 0)
5429     return;
5430   type = GST_READ_UINT16_LE (data + 2);
5431   if (type != 1 && type != 2)
5432     return;
5433   nimages = GST_READ_UINT16_LE (data + 4);
5434   if (nimages == 0)             /* we can assume we can't have an empty image file ? */
5435     return;
5436 
5437   /* first image */
5438   if (data[6 + 3] != 0)
5439     return;
5440   if (type == 1) {
5441     guint16 planes = GST_READ_UINT16_LE (data + 6 + 4);
5442     if (planes > 1)
5443       return;
5444   }
5445   size = GST_READ_UINT32_LE (data + 6 + 8);
5446   offset = GST_READ_UINT32_LE (data + 6 + 12);
5447   if (offset < 0 || size <= 0 || size >= datalen || offset >= datalen
5448       || size + offset > datalen)
5449     return;
5450 
5451   gst_type_find_suggest_simple (find, GST_TYPE_FIND_NEARLY_CERTAIN,
5452       "image/x-icon", NULL);
5453 }
5454 
5455 /*** WAP WBMP typefinder ***/
5456 
5457 static void
wbmp_typefind(GstTypeFind * find,gpointer user_data)5458 wbmp_typefind (GstTypeFind * find, gpointer user_data)
5459 {
5460   const guint8 *data;
5461   gint64 datalen;
5462   guint w, h, size;
5463 
5464   /* http://en.wikipedia.org/wiki/Wireless_Application_Protocol_Bitmap_Format */
5465   datalen = gst_type_find_get_length (find);
5466   if (datalen == 0)
5467     return;
5468 
5469   data = gst_type_find_peek (find, 0, 5);
5470   if (data == NULL)
5471     return;
5472 
5473   /* want 0x00 0x00 at start */
5474   if (*data++ != 0 || *data++ != 0)
5475     return;
5476 
5477   /* min header size */
5478   size = 4;
5479 
5480   /* let's assume max width/height is 65536 */
5481   w = *data++;
5482   if ((w & 0x80)) {
5483     w = (w << 8) | *data++;
5484     if ((w & 0x80))
5485       return;
5486     ++size;
5487     data = gst_type_find_peek (find, 4, 2);
5488     if (data == NULL)
5489       return;
5490   }
5491   h = *data++;
5492   if ((h & 0x80)) {
5493     h = (h << 8) | *data++;
5494     if ((h & 0x80))
5495       return;
5496     ++size;
5497   }
5498 
5499   if (w == 0 || h == 0)
5500     return;
5501 
5502   /* now add bitmap size */
5503   size += h * (GST_ROUND_UP_8 (w) / 8);
5504 
5505   if (datalen == size) {
5506     gst_type_find_suggest_simple (find, GST_TYPE_FIND_POSSIBLE - 10,
5507         "image/vnd.wap.wbmp", NULL);
5508   }
5509 }
5510 
5511 /*** DEGAS Atari images (also to avoid false positives, see #625129) ***/
5512 static void
degas_type_find(GstTypeFind * tf,gpointer private)5513 degas_type_find (GstTypeFind * tf, gpointer private)
5514 {
5515   /* No magic, but it should have a fixed size and a few invalid values */
5516   /* http://www.fileformat.info/format/atari/spec/6ecf9f6eb5be494284a47feb8a214687/view.htm */
5517   gint64 len;
5518   const guint8 *data;
5519   guint16 resolution;
5520   int n;
5521 
5522   len = gst_type_find_get_length (tf);
5523   if (len < 34)                 /* smallest header of the lot */
5524     return;
5525   data = gst_type_find_peek (tf, 0, 4);
5526   if (G_UNLIKELY (data == NULL))
5527     return;
5528   resolution = GST_READ_UINT16_BE (data);
5529   if (len == 32034) {
5530     /* could be DEGAS */
5531     if (resolution <= 2)
5532       gst_type_find_suggest_simple (tf, GST_TYPE_FIND_POSSIBLE + 5,
5533           "image/x-degas", NULL);
5534   } else if (len == 32066) {
5535     /* could be DEGAS Elite */
5536     if (resolution <= 2) {
5537       data = gst_type_find_peek (tf, len - 16, 8);
5538       if (G_UNLIKELY (data == NULL))
5539         return;
5540       for (n = 0; n < 4; n++) {
5541         if (GST_READ_UINT16_BE (data + n * 2) > 2)
5542           return;
5543       }
5544       gst_type_find_suggest_simple (tf, GST_TYPE_FIND_POSSIBLE + 5,
5545           "image/x-degas", NULL);
5546     }
5547   } else if (len >= 66 && len < 32066) {
5548     /* could be compressed DEGAS Elite, but it's compressed and so we can't rely on size,
5549        it does have 4 16 bytes values near the end that are 0-2 though. */
5550     if ((resolution & 0x8000) && (resolution & 0x7fff) <= 2) {
5551       data = gst_type_find_peek (tf, len - 16, 8);
5552       if (G_UNLIKELY (data == NULL))
5553         return;
5554       for (n = 0; n < 4; n++) {
5555         if (GST_READ_UINT16_BE (data + n * 2) > 2)
5556           return;
5557       }
5558       gst_type_find_suggest_simple (tf, GST_TYPE_FIND_POSSIBLE + 5,
5559           "image/x-degas", NULL);
5560     }
5561   }
5562 }
5563 
5564 /*** y4m ***/
5565 
5566 static void
y4m_typefind(GstTypeFind * tf,gpointer private)5567 y4m_typefind (GstTypeFind * tf, gpointer private)
5568 {
5569   const guint8 *data;
5570 
5571   data = gst_type_find_peek (tf, 0, 10);
5572   if (data != NULL && memcmp (data, "YUV4MPEG2 ", 10) == 0) {
5573     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY,
5574         "application/x-yuv4mpeg", "y4mversion", G_TYPE_INT, 2, NULL);
5575   }
5576 }
5577 
5578 /*** DVD ISO images (looks like H.264, see #674069) ***/
5579 static void
dvdiso_type_find(GstTypeFind * tf,gpointer private)5580 dvdiso_type_find (GstTypeFind * tf, gpointer private)
5581 {
5582   /* 0x8000 bytes of zeros, then "\001CD001" */
5583   gint64 len;
5584   const guint8 *data;
5585 
5586   len = gst_type_find_get_length (tf);
5587   if (len < 0x8006)
5588     return;
5589   data = gst_type_find_peek (tf, 0, 0x8006);
5590   if (G_UNLIKELY (data == NULL))
5591     return;
5592   for (len = 0; len < 0x8000; len++)
5593     if (data[len])
5594       return;
5595   /* Can the '1' be anything else ? My three samples all have '1'. */
5596   if (memcmp (data + 0x8000, "\001CD001", 6))
5597     return;
5598 
5599   /* May need more inspection, we may be able to demux some of them */
5600   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY,
5601       "application/octet-stream", NULL);
5602 }
5603 
5604 /* SSA/ASS subtitles
5605  *
5606  * http://en.wikipedia.org/wiki/SubStation_Alpha
5607  * http://matroska.org/technical/specs/subtitles/ssa.html
5608  */
5609 static void
ssa_type_find(GstTypeFind * tf,gpointer private)5610 ssa_type_find (GstTypeFind * tf, gpointer private)
5611 {
5612   const gchar *start, *end, *ver_str, *media_type = NULL;
5613   const guint8 *data;
5614   gchar *str, *script_type, *p = NULL;
5615   gint64 len;
5616 
5617   data = gst_type_find_peek (tf, 0, 32);
5618 
5619   if (data == NULL)
5620     return;
5621 
5622   /* FIXME: detect utf-16/32 BOM and convert before typefinding the rest */
5623 
5624   /* there might be a UTF-8 BOM at the beginning */
5625   if (memcmp (data, "[Script Info]", 13) != 0 &&
5626       memcmp (data + 3, "[Script Info]", 13) != 0) {
5627     return;
5628   }
5629 
5630   /* now check if we have SSA or ASS */
5631   len = gst_type_find_get_length (tf);
5632   if (len > 8192)
5633     len = 8192;
5634 
5635   data = gst_type_find_peek (tf, 0, len);
5636   if (data == NULL)
5637     return;
5638 
5639   /* skip BOM */
5640   start = (gchar *) memchr (data, '[', 5);
5641   g_assert (start);
5642   len -= (start - (gchar *) data);
5643 
5644   /* ignore anything non-UTF8 for now, in future we might at least allow
5645    * other UTF variants that are clearly prefixed with the appropriate BOM */
5646   if (!g_utf8_validate (start, len, &end) && (len - (end - start)) > 6) {
5647     GST_FIXME ("non-UTF8 SSA/ASS file");
5648     return;
5649   }
5650 
5651   /* something at start,  but not a UTF-8 BOM? */
5652   if (data[0] != '[' && (data[0] != 0xEF || data[1] != 0xBB || data[2] != 0xBF))
5653     return;
5654 
5655   /* ignore any partial UTF-8 characters at the end */
5656   len = end - start;
5657 
5658   /* create a NUL-terminated string so it's easier to process it safely */
5659   str = g_strndup (start, len - 1);
5660   script_type = strstr (str, "ScriptType:");
5661   if (script_type != NULL) {
5662     gdouble version;
5663 
5664     ver_str = script_type + 11;
5665     while (*ver_str == ' ' || *ver_str == 'v' || *ver_str == 'V')
5666       ++ver_str;
5667     version = g_ascii_strtod (ver_str, &p);
5668     if (version == 4.0 && p != NULL && *p == '+')
5669       media_type = "application/x-ass";
5670     else if (version >= 1.0 && version <= 4.0)
5671       media_type = "application/x-ssa";
5672   }
5673 
5674   if (media_type == NULL) {
5675     if (strstr (str, "[v4+ Styles]") || strstr (str, "[V4+ Styles]"))
5676       media_type = "application/x-ass";
5677     else if (strstr (str, "[v4 Styles]") || strstr (str, "[V4 Styles]"))
5678       media_type = "application/x-ssa";
5679   }
5680 
5681   if (media_type != NULL) {
5682     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
5683         media_type, "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
5684   } else {
5685     GST_WARNING ("could not detect SSA/ASS variant");
5686   }
5687 
5688   g_free (str);
5689 }
5690 
5691 /*** application/x-mcc ***/
5692 static GstStaticCaps mcc_caps = GST_STATIC_CAPS ("application/x-mcc");
5693 
5694 #define MCC_CAPS gst_static_caps_get(&mcc_caps)
5695 
5696 static void
mcc_type_find(GstTypeFind * tf,gpointer private)5697 mcc_type_find (GstTypeFind * tf, gpointer private)
5698 {
5699   const guint8 *data;
5700 
5701   data = gst_type_find_peek (tf, 0, 31);
5702 
5703   if (data == NULL)
5704     return;
5705 
5706   /* MCC files always start with this followed by the version */
5707   if (memcmp (data, "File Format=MacCaption_MCC V", 28) != 0 ||
5708       !g_ascii_isdigit (data[28]) || data[29] != '.' ||
5709       !g_ascii_isdigit (data[30])) {
5710     return;
5711   }
5712 
5713   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
5714       "application/x-mcc", "version", G_TYPE_INT, data[28] - '0', NULL);
5715 }
5716 
5717 /*** video/x-pva ***/
5718 
5719 static GstStaticCaps pva_caps = GST_STATIC_CAPS ("video/x-pva");
5720 
5721 #define PVA_CAPS gst_static_caps_get(&pva_caps)
5722 
5723 static void
pva_type_find(GstTypeFind * tf,gpointer private)5724 pva_type_find (GstTypeFind * tf, gpointer private)
5725 {
5726   const guint8 *data;
5727 
5728   data = gst_type_find_peek (tf, 0, 5);
5729 
5730   if (data == NULL)
5731     return;
5732 
5733   if (data[0] == 'A' && data[1] == 'V' && data[2] < 3 && data[4] == 0x55)
5734     gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, PVA_CAPS);
5735 }
5736 
5737 /*** audio/audible ***/
5738 
5739 /* derived from pyaudibletags
5740  * http://code.google.com/p/pyaudibletags/source/browse/trunk/pyaudibletags.py
5741  */
5742 static GstStaticCaps aa_caps = GST_STATIC_CAPS ("audio/x-audible");
5743 
5744 #define AA_CAPS gst_static_caps_get(&aa_caps)
5745 
5746 static void
aa_type_find(GstTypeFind * tf,gpointer private)5747 aa_type_find (GstTypeFind * tf, gpointer private)
5748 {
5749   const guint8 *data;
5750 
5751   data = gst_type_find_peek (tf, 0, 12);
5752   if (data == NULL)
5753     return;
5754 
5755   if (GST_READ_UINT32_BE (data + 4) == 0x57907536) {
5756     guint64 media_len;
5757 
5758     media_len = gst_type_find_get_length (tf);
5759     if (media_len > 0 && GST_READ_UINT32_BE (data) == media_len)
5760       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, AA_CAPS);
5761     else
5762       gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, AA_CAPS);
5763   }
5764 }
5765 
5766 /*** generic typefind for streams that have some data at a specific position***/
5767 typedef struct
5768 {
5769   const guint8 *data;
5770   guint size;
5771   guint probability;
5772   GstCaps *caps;
5773 }
5774 GstTypeFindData;
5775 
5776 static void
start_with_type_find(GstTypeFind * tf,gpointer private)5777 start_with_type_find (GstTypeFind * tf, gpointer private)
5778 {
5779   GstTypeFindData *start_with = (GstTypeFindData *) private;
5780   const guint8 *data;
5781 
5782   GST_LOG ("trying to find mime type %s with the first %u bytes of data",
5783       gst_structure_get_name (gst_caps_get_structure (start_with->caps, 0)),
5784       start_with->size);
5785   data = gst_type_find_peek (tf, 0, start_with->size);
5786   if (data && memcmp (data, start_with->data, start_with->size) == 0) {
5787     gst_type_find_suggest (tf, start_with->probability, start_with->caps);
5788   }
5789 }
5790 
5791 static void
sw_data_destroy(GstTypeFindData * sw_data)5792 sw_data_destroy (GstTypeFindData * sw_data)
5793 {
5794   if (G_LIKELY (sw_data->caps != NULL))
5795     gst_caps_unref (sw_data->caps);
5796   g_slice_free (GstTypeFindData, sw_data);
5797 }
5798 
5799 #define TYPE_FIND_REGISTER_START_WITH(plugin,name,rank,ext,_data,_size,_probability)\
5800 G_BEGIN_DECLS{                                                          \
5801   GstTypeFindData *sw_data = g_slice_new (GstTypeFindData);             \
5802   sw_data->data = (const guint8 *)_data;                                \
5803   sw_data->size = _size;                                                \
5804   sw_data->probability = _probability;                                  \
5805   sw_data->caps = gst_caps_new_empty_simple (name);                     \
5806   if (!gst_type_find_register (plugin, name, rank, start_with_type_find,\
5807                      ext, sw_data->caps, sw_data,                       \
5808                      (GDestroyNotify) (sw_data_destroy))) {             \
5809     sw_data_destroy (sw_data);                                          \
5810   }                                                                     \
5811 }G_END_DECLS
5812 
5813 /*** same for riff types ***/
5814 
5815 static void
riff_type_find(GstTypeFind * tf,gpointer private)5816 riff_type_find (GstTypeFind * tf, gpointer private)
5817 {
5818   GstTypeFindData *riff_data = (GstTypeFindData *) private;
5819   const guint8 *data = gst_type_find_peek (tf, 0, 12);
5820 
5821   if (data && (memcmp (data, "RIFF", 4) == 0 || memcmp (data, "AVF0", 4) == 0)) {
5822     data += 8;
5823     if (memcmp (data, riff_data->data, 4) == 0)
5824       gst_type_find_suggest (tf, riff_data->probability, riff_data->caps);
5825   }
5826 }
5827 
5828 #define TYPE_FIND_REGISTER_RIFF(plugin,name,rank,ext,_data)             \
5829 G_BEGIN_DECLS{                                                          \
5830   GstTypeFindData *sw_data = g_slice_new (GstTypeFindData);             \
5831   sw_data->data = (gpointer)_data;                                      \
5832   sw_data->size = 4;                                                    \
5833   sw_data->probability = GST_TYPE_FIND_MAXIMUM;                         \
5834   sw_data->caps = gst_caps_new_empty_simple (name);                     \
5835   if (!gst_type_find_register (plugin, name, rank, riff_type_find,      \
5836                       ext, sw_data->caps, sw_data,                      \
5837                       (GDestroyNotify) (sw_data_destroy))) {            \
5838     sw_data_destroy (sw_data);                                          \
5839   }                                                                     \
5840 }G_END_DECLS
5841 
5842 
5843 /*** plugin initialization ***/
5844 
5845 #define TYPE_FIND_REGISTER(plugin,name,rank,func,ext,caps,priv,notify) \
5846 G_BEGIN_DECLS{\
5847   if (!gst_type_find_register (plugin, name, rank, func, ext, caps, priv, notify))\
5848     return FALSE; \
5849 }G_END_DECLS
5850 
5851 
5852 static gboolean
plugin_init(GstPlugin * plugin)5853 plugin_init (GstPlugin * plugin)
5854 {
5855   /* can't initialize this via a struct as caps can't be statically initialized */
5856 
5857   GST_DEBUG_CATEGORY_INIT (type_find_debug, "typefindfunctions",
5858       GST_DEBUG_FG_GREEN | GST_DEBUG_BG_RED, "generic type find functions");
5859 
5860   /* note: asx/wax/wmx are XML files, asf doesn't handle them */
5861   /* must use strings, macros don't accept initializers */
5862   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-ms-asf", GST_RANK_SECONDARY,
5863       "asf,wm,wma,wmv",
5864       "\060\046\262\165\216\146\317\021\246\331\000\252\000\142\316\154", 16,
5865       GST_TYPE_FIND_MAXIMUM);
5866   TYPE_FIND_REGISTER (plugin, "audio/x-musepack", GST_RANK_PRIMARY,
5867       musepack_type_find, "mpc,mpp,mp+", MUSEPACK_CAPS, NULL, NULL);
5868   TYPE_FIND_REGISTER (plugin, "audio/x-au", GST_RANK_MARGINAL,
5869       au_type_find, "au,snd", AU_CAPS, NULL, NULL);
5870   TYPE_FIND_REGISTER_RIFF (plugin, "video/x-msvideo", GST_RANK_PRIMARY,
5871       "avi", "AVI ");
5872   TYPE_FIND_REGISTER_RIFF (plugin, "audio/qcelp", GST_RANK_PRIMARY,
5873       "qcp", "QLCM");
5874   TYPE_FIND_REGISTER_RIFF (plugin, "video/x-cdxa", GST_RANK_PRIMARY,
5875       "dat", "CDXA");
5876   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-vcd", GST_RANK_PRIMARY,
5877       "dat", "\000\377\377\377\377\377\377\377\377\377\377\000", 12,
5878       GST_TYPE_FIND_MAXIMUM);
5879   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-imelody", GST_RANK_PRIMARY,
5880       "imy,ime,imelody", "BEGIN:IMELODY", 13, GST_TYPE_FIND_MAXIMUM);
5881   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-scc", GST_RANK_PRIMARY,
5882       "scc", "Scenarist_SCC V1.0", 18, GST_TYPE_FIND_MAXIMUM);
5883   TYPE_FIND_REGISTER (plugin, "application/x-mcc", GST_RANK_PRIMARY,
5884       mcc_type_find, "mcc", MCC_CAPS, NULL, NULL);
5885 
5886 #if 0
5887   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-smoke", GST_RANK_PRIMARY,
5888       NULL, "\x80smoke\x00\x01\x00", 6, GST_TYPE_FIND_MAXIMUM);
5889 #endif
5890   TYPE_FIND_REGISTER (plugin, "audio/midi", GST_RANK_PRIMARY, mid_type_find,
5891       "mid,midi", MID_CAPS, NULL, NULL);
5892   TYPE_FIND_REGISTER_RIFF (plugin, "audio/riff-midi", GST_RANK_PRIMARY,
5893       "mid,midi", "RMID");
5894   TYPE_FIND_REGISTER (plugin, "audio/mobile-xmf", GST_RANK_PRIMARY,
5895       mxmf_type_find, "mxmf", MXMF_CAPS, NULL, NULL);
5896   TYPE_FIND_REGISTER (plugin, "video/x-fli", GST_RANK_MARGINAL, flx_type_find,
5897       "flc,fli", FLX_CAPS, NULL, NULL);
5898   TYPE_FIND_REGISTER (plugin, "application/x-id3v2", GST_RANK_PRIMARY + 103,
5899       id3v2_type_find, "mp3,mp2,mp1,mpga,ogg,flac,tta", ID3_CAPS, NULL, NULL);
5900   TYPE_FIND_REGISTER (plugin, "application/x-id3v1", GST_RANK_PRIMARY + 101,
5901       id3v1_type_find, "mp3,mp2,mp1,mpga,ogg,flac,tta", ID3_CAPS, NULL, NULL);
5902   TYPE_FIND_REGISTER (plugin, "application/x-apetag", GST_RANK_PRIMARY + 102,
5903       apetag_type_find, "mp3,ape,mpc,wv", APETAG_CAPS, NULL, NULL);
5904   TYPE_FIND_REGISTER (plugin, "audio/x-ttafile", GST_RANK_PRIMARY,
5905       tta_type_find, "tta", TTA_CAPS, NULL, NULL);
5906   TYPE_FIND_REGISTER (plugin, "audio/x-mod", GST_RANK_SECONDARY, mod_type_find,
5907       "669,amf,ams,dbm,digi,dmf,dsm,gdm,far,imf,it,j2b,mdl,med,mod,mt2,mtm,"
5908       "okt,psm,ptm,sam,s3m,stm,stx,ult,umx,xm", MOD_CAPS, NULL, NULL);
5909   TYPE_FIND_REGISTER (plugin, "audio/mpeg", GST_RANK_PRIMARY, mp3_type_find,
5910       "mp3,mp2,mp1,mpga", MP3_CAPS, NULL, NULL);
5911   TYPE_FIND_REGISTER (plugin, "audio/x-ac3", GST_RANK_PRIMARY, ac3_type_find,
5912       "ac3,eac3", AC3_CAPS, NULL, NULL);
5913   TYPE_FIND_REGISTER (plugin, "audio/x-dts", GST_RANK_SECONDARY, dts_type_find,
5914       "dts", DTS_CAPS, NULL, NULL);
5915   TYPE_FIND_REGISTER (plugin, "audio/x-gsm", GST_RANK_PRIMARY, NULL, "gsm",
5916       GSM_CAPS, NULL, NULL);
5917   TYPE_FIND_REGISTER (plugin, "video/mpeg-sys", GST_RANK_PRIMARY,
5918       mpeg_sys_type_find, "mpe,mpeg,mpg", MPEG_SYS_CAPS, NULL, NULL);
5919   TYPE_FIND_REGISTER (plugin, "video/mpegts", GST_RANK_PRIMARY,
5920       mpeg_ts_type_find, "ts,mts", MPEGTS_CAPS, NULL, NULL);
5921   TYPE_FIND_REGISTER (plugin, "application/ogg", GST_RANK_PRIMARY,
5922       ogganx_type_find, "ogg,oga,ogv,ogm,ogx,spx,anx,axa,axv", OGG_CAPS,
5923       NULL, NULL);
5924   TYPE_FIND_REGISTER (plugin, "video/mpeg-elementary", GST_RANK_MARGINAL,
5925       mpeg_video_stream_type_find, "mpv,mpeg,mpg", MPEG_VIDEO_CAPS, NULL, NULL);
5926   TYPE_FIND_REGISTER (plugin, "video/mpeg4", GST_RANK_PRIMARY,
5927       mpeg4_video_type_find, "m4v", MPEG_VIDEO_CAPS, NULL, NULL);
5928   TYPE_FIND_REGISTER (plugin, "video/x-h263", GST_RANK_SECONDARY,
5929       h263_video_type_find, "h263,263", H263_VIDEO_CAPS, NULL, NULL);
5930   TYPE_FIND_REGISTER (plugin, "video/x-h264", GST_RANK_PRIMARY,
5931       h264_video_type_find, "h264,x264,264", H264_VIDEO_CAPS, NULL, NULL);
5932   TYPE_FIND_REGISTER (plugin, "video/x-h265", GST_RANK_PRIMARY,
5933       h265_video_type_find, "h265,x265,265", H265_VIDEO_CAPS, NULL, NULL);
5934   TYPE_FIND_REGISTER (plugin, "video/x-nuv", GST_RANK_SECONDARY, nuv_type_find,
5935       "nuv", NUV_CAPS, NULL, NULL);
5936 
5937   /* ISO formats */
5938   TYPE_FIND_REGISTER (plugin, "audio/x-m4a", GST_RANK_PRIMARY, m4a_type_find,
5939       "m4a", M4A_CAPS, NULL, NULL);
5940   TYPE_FIND_REGISTER (plugin, "application/x-3gp", GST_RANK_PRIMARY,
5941       q3gp_type_find, "3gp", Q3GP_CAPS, NULL, NULL);
5942   TYPE_FIND_REGISTER (plugin, "video/quicktime", GST_RANK_PRIMARY,
5943       qt_type_find, "mov,mp4", QT_CAPS, NULL, NULL);
5944   TYPE_FIND_REGISTER (plugin, "image/x-quicktime", GST_RANK_SECONDARY,
5945       qtif_type_find, "qif,qtif,qti", QTIF_CAPS, NULL, NULL);
5946   TYPE_FIND_REGISTER (plugin, "image/jp2", GST_RANK_PRIMARY,
5947       jp2_type_find, "jp2", JP2_CAPS, NULL, NULL);
5948   TYPE_FIND_REGISTER (plugin, "image/x-jpc", GST_RANK_PRIMARY,
5949       jpc_type_find, "jpc,j2k", JPC_CAPS, NULL, NULL);
5950   TYPE_FIND_REGISTER (plugin, "video/mj2", GST_RANK_PRIMARY,
5951       jp2_type_find, "mj2", MJ2_CAPS, NULL, NULL);
5952 
5953   TYPE_FIND_REGISTER (plugin, "text/html", GST_RANK_SECONDARY, html_type_find,
5954       "htm,html", HTML_CAPS, NULL, NULL);
5955   TYPE_FIND_REGISTER_START_WITH (plugin, "application/vnd.rn-realmedia",
5956       GST_RANK_SECONDARY, "ra,ram,rm,rmvb", ".RMF", 4, GST_TYPE_FIND_MAXIMUM);
5957   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-pn-realaudio",
5958       GST_RANK_SECONDARY, "ra,ram,rm,rmvb", ".ra\375", 4,
5959       GST_TYPE_FIND_MAXIMUM);
5960   TYPE_FIND_REGISTER (plugin, "application/x-shockwave-flash",
5961       GST_RANK_SECONDARY, swf_type_find, "swf,swfl", SWF_CAPS, NULL, NULL);
5962   TYPE_FIND_REGISTER (plugin, "application/xges",
5963       GST_RANK_PRIMARY, xges_type_find, "xges", XGES_CAPS, NULL, NULL);
5964   TYPE_FIND_REGISTER (plugin, "application/dash+xml",
5965       GST_RANK_PRIMARY, dash_mpd_type_find, "mpd,MPD", DASH_CAPS, NULL, NULL);
5966   TYPE_FIND_REGISTER (plugin, "application/vnd.ms-sstr+xml",
5967       GST_RANK_PRIMARY, mss_manifest_type_find, NULL, MSS_MANIFEST_CAPS, NULL,
5968       NULL);
5969   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-flv", GST_RANK_SECONDARY,
5970       "flv", "FLV", 3, GST_TYPE_FIND_MAXIMUM);
5971   TYPE_FIND_REGISTER (plugin, "text/plain", GST_RANK_MARGINAL, utf8_type_find,
5972       "txt", UTF8_CAPS, NULL, NULL);
5973   TYPE_FIND_REGISTER (plugin, "text/utf-16", GST_RANK_MARGINAL, utf16_type_find,
5974       "txt", UTF16_CAPS, NULL, NULL);
5975   TYPE_FIND_REGISTER (plugin, "text/utf-32", GST_RANK_MARGINAL, utf32_type_find,
5976       "txt", UTF32_CAPS, NULL, NULL);
5977   TYPE_FIND_REGISTER (plugin, "text/uri-list", GST_RANK_MARGINAL, uri_type_find,
5978       "ram", URI_CAPS, NULL, NULL);
5979   TYPE_FIND_REGISTER (plugin, "application/itc", GST_RANK_SECONDARY,
5980       itc_type_find, "itc", ITC_CAPS, NULL, NULL);
5981   TYPE_FIND_REGISTER (plugin, "application/x-hls", GST_RANK_MARGINAL,
5982       hls_type_find, "m3u8", HLS_CAPS, NULL, NULL);
5983   TYPE_FIND_REGISTER (plugin, "application/sdp", GST_RANK_SECONDARY,
5984       sdp_type_find, "sdp", SDP_CAPS, NULL, NULL);
5985   TYPE_FIND_REGISTER (plugin, "application/smil", GST_RANK_SECONDARY,
5986       smil_type_find, "smil", SMIL_CAPS, NULL, NULL);
5987   TYPE_FIND_REGISTER (plugin, "application/ttml+xml", GST_RANK_SECONDARY,
5988       ttml_xml_type_find, "ttml+xml", TTML_XML_CAPS, NULL, NULL);
5989   TYPE_FIND_REGISTER (plugin, "application/xml", GST_RANK_MARGINAL,
5990       xml_type_find, "xml", GENERIC_XML_CAPS, NULL, NULL);
5991   TYPE_FIND_REGISTER_RIFF (plugin, "audio/x-wav", GST_RANK_PRIMARY, "wav",
5992       "WAVE");
5993   TYPE_FIND_REGISTER (plugin, "audio/x-aiff", GST_RANK_SECONDARY,
5994       aiff_type_find, "aiff,aif,aifc", AIFF_CAPS, NULL, NULL);
5995   TYPE_FIND_REGISTER (plugin, "audio/x-svx", GST_RANK_SECONDARY, svx_type_find,
5996       "iff,svx", SVX_CAPS, NULL, NULL);
5997   TYPE_FIND_REGISTER (plugin, "audio/x-paris", GST_RANK_SECONDARY,
5998       paris_type_find, "paf", PARIS_CAPS, NULL, NULL);
5999   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-nist", GST_RANK_SECONDARY,
6000       "nist", "NIST", 4, GST_TYPE_FIND_MAXIMUM);
6001   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-voc", GST_RANK_SECONDARY,
6002       "voc", "Creative", 8, GST_TYPE_FIND_MAXIMUM);
6003   TYPE_FIND_REGISTER (plugin, "audio/x-sds", GST_RANK_SECONDARY, sds_type_find,
6004       "sds", SDS_CAPS, NULL, NULL);
6005   TYPE_FIND_REGISTER (plugin, "audio/x-ircam", GST_RANK_SECONDARY,
6006       ircam_type_find, "sf", IRCAM_CAPS, NULL, NULL);
6007   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-w64", GST_RANK_SECONDARY,
6008       "w64", "riff", 4, GST_TYPE_FIND_MAXIMUM);
6009   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-rf64", GST_RANK_PRIMARY,
6010       "rf64", "RF64", 4, GST_TYPE_FIND_MAXIMUM);
6011   TYPE_FIND_REGISTER (plugin, "audio/x-shorten", GST_RANK_SECONDARY,
6012       shn_type_find, "shn", SHN_CAPS, NULL, NULL);
6013   TYPE_FIND_REGISTER (plugin, "application/x-ape", GST_RANK_SECONDARY,
6014       ape_type_find, "ape", APE_CAPS, NULL, NULL);
6015   TYPE_FIND_REGISTER (plugin, "image/jpeg", GST_RANK_PRIMARY + 15,
6016       jpeg_type_find, "jpg,jpe,jpeg", JPEG_CAPS, NULL, NULL);
6017   TYPE_FIND_REGISTER_START_WITH (plugin, "image/gif", GST_RANK_PRIMARY, "gif",
6018       "GIF8", 4, GST_TYPE_FIND_MAXIMUM);
6019   TYPE_FIND_REGISTER_START_WITH (plugin, "image/png", GST_RANK_PRIMARY + 14,
6020       "png", "\211PNG\015\012\032\012", 8, GST_TYPE_FIND_MAXIMUM);
6021   TYPE_FIND_REGISTER (plugin, "image/bmp", GST_RANK_PRIMARY, bmp_type_find,
6022       "bmp", BMP_CAPS, NULL, NULL);
6023   TYPE_FIND_REGISTER (plugin, "image/tiff", GST_RANK_PRIMARY, tiff_type_find,
6024       "tif,tiff", TIFF_CAPS, NULL, NULL);
6025   TYPE_FIND_REGISTER_RIFF (plugin, "image/webp", GST_RANK_PRIMARY,
6026       "webp", "WEBP");
6027   TYPE_FIND_REGISTER (plugin, "image/x-exr", GST_RANK_PRIMARY, exr_type_find,
6028       "exr", EXR_CAPS, NULL, NULL);
6029   TYPE_FIND_REGISTER (plugin, "image/x-portable-pixmap", GST_RANK_SECONDARY,
6030       pnm_type_find, "pnm,ppm,pgm,pbm", PNM_CAPS, NULL, NULL);
6031   TYPE_FIND_REGISTER (plugin, "video/x-matroska", GST_RANK_PRIMARY,
6032       matroska_type_find, "mkv,mka,mk3d,webm", MATROSKA_CAPS, NULL, NULL);
6033   TYPE_FIND_REGISTER (plugin, "application/mxf", GST_RANK_PRIMARY,
6034       mxf_type_find, "mxf", MXF_CAPS, NULL, NULL);
6035   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-mve", GST_RANK_SECONDARY,
6036       "mve", "Interplay MVE File\032\000\032\000\000\001\063\021", 26,
6037       GST_TYPE_FIND_MAXIMUM);
6038   TYPE_FIND_REGISTER (plugin, "video/x-dv", GST_RANK_SECONDARY, dv_type_find,
6039       "dv,dif", DV_CAPS, NULL, NULL);
6040   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-amr-nb-sh", GST_RANK_PRIMARY,
6041       "amr", "#!AMR", 5, GST_TYPE_FIND_LIKELY);
6042   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-amr-wb-sh", GST_RANK_PRIMARY,
6043       "amr", "#!AMR-WB", 7, GST_TYPE_FIND_MAXIMUM);
6044   TYPE_FIND_REGISTER (plugin, "audio/iLBC-sh", GST_RANK_PRIMARY, ilbc_type_find,
6045       "ilbc", ILBC_CAPS, NULL, NULL);
6046   TYPE_FIND_REGISTER (plugin, "audio/x-sbc", GST_RANK_MARGINAL, sbc_type_find,
6047       "sbc", SBC_CAPS, NULL, NULL);
6048   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-sid", GST_RANK_MARGINAL,
6049       "sid", "PSID", 4, GST_TYPE_FIND_MAXIMUM);
6050   TYPE_FIND_REGISTER_START_WITH (plugin, "image/x-xcf", GST_RANK_SECONDARY,
6051       "xcf", "gimp xcf", 8, GST_TYPE_FIND_MAXIMUM);
6052   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-mng", GST_RANK_SECONDARY,
6053       "mng", "\212MNG\015\012\032\012", 8, GST_TYPE_FIND_MAXIMUM);
6054   TYPE_FIND_REGISTER_START_WITH (plugin, "image/x-jng", GST_RANK_SECONDARY,
6055       "jng", "\213JNG\015\012\032\012", 8, GST_TYPE_FIND_MAXIMUM);
6056   TYPE_FIND_REGISTER_START_WITH (plugin, "image/x-xpixmap", GST_RANK_SECONDARY,
6057       "xpm", "/* XPM */", 9, GST_TYPE_FIND_MAXIMUM);
6058   TYPE_FIND_REGISTER_START_WITH (plugin, "image/x-sun-raster",
6059       GST_RANK_SECONDARY, "ras", "\131\246\152\225", 4, GST_TYPE_FIND_MAXIMUM);
6060   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-bzip",
6061       GST_RANK_SECONDARY, "bz2", "BZh", 3, GST_TYPE_FIND_LIKELY);
6062   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-gzip",
6063       GST_RANK_SECONDARY, "gz", "\037\213", 2, GST_TYPE_FIND_LIKELY);
6064   TYPE_FIND_REGISTER_START_WITH (plugin, "application/zip", GST_RANK_SECONDARY,
6065       "zip", "PK\003\004", 4, GST_TYPE_FIND_LIKELY);
6066   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-compress",
6067       GST_RANK_SECONDARY, "Z", "\037\235", 2, GST_TYPE_FIND_LIKELY);
6068   TYPE_FIND_REGISTER (plugin, "subtitle/x-kate", GST_RANK_MARGINAL,
6069       kate_type_find, NULL, NULL, NULL, NULL);
6070   TYPE_FIND_REGISTER (plugin, "application/x-subtitle-vtt", GST_RANK_MARGINAL,
6071       webvtt_type_find, "vtt", WEBVTT_CAPS, NULL, NULL);
6072   TYPE_FIND_REGISTER (plugin, "audio/x-flac", GST_RANK_PRIMARY, flac_type_find,
6073       "flac", FLAC_CAPS, NULL, NULL);
6074   TYPE_FIND_REGISTER (plugin, "audio/x-vorbis", GST_RANK_PRIMARY,
6075       vorbis_type_find, NULL, VORBIS_CAPS, NULL, NULL);
6076   TYPE_FIND_REGISTER (plugin, "video/x-theora", GST_RANK_PRIMARY,
6077       theora_type_find, NULL, THEORA_CAPS, NULL, NULL);
6078   TYPE_FIND_REGISTER (plugin, "application/x-ogm-video", GST_RANK_PRIMARY,
6079       ogmvideo_type_find, NULL, OGMVIDEO_CAPS, NULL, NULL);
6080   TYPE_FIND_REGISTER (plugin, "application/x-ogm-audio", GST_RANK_PRIMARY,
6081       ogmaudio_type_find, NULL, OGMAUDIO_CAPS, NULL, NULL);
6082   TYPE_FIND_REGISTER (plugin, "application/x-ogm-text", GST_RANK_PRIMARY,
6083       ogmtext_type_find, NULL, OGMTEXT_CAPS, NULL, NULL);
6084   TYPE_FIND_REGISTER (plugin, "audio/x-speex", GST_RANK_PRIMARY,
6085       speex_type_find, NULL, SPEEX_CAPS, NULL, NULL);
6086   TYPE_FIND_REGISTER (plugin, "audio/x-celt", GST_RANK_PRIMARY, celt_type_find,
6087       NULL, CELT_CAPS, NULL, NULL);
6088   TYPE_FIND_REGISTER (plugin, "application/x-ogg-skeleton", GST_RANK_PRIMARY,
6089       oggskel_type_find, NULL, OGG_SKELETON_CAPS, NULL, NULL);
6090   TYPE_FIND_REGISTER (plugin, "text/x-cmml", GST_RANK_PRIMARY, cmml_type_find,
6091       NULL, CMML_CAPS, NULL, NULL);
6092   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-executable",
6093       GST_RANK_MARGINAL, NULL, "\177ELF", 4, GST_TYPE_FIND_MAXIMUM);
6094   TYPE_FIND_REGISTER (plugin, "audio/aac", GST_RANK_SECONDARY, aac_type_find,
6095       "aac,adts,adif,loas", AAC_CAPS, NULL, NULL);
6096   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-spc", GST_RANK_SECONDARY,
6097       "spc", "SNES-SPC700 Sound File Data", 27, GST_TYPE_FIND_MAXIMUM);
6098   TYPE_FIND_REGISTER (plugin, "audio/x-wavpack", GST_RANK_SECONDARY,
6099       wavpack_type_find, "wv,wvp", WAVPACK_CAPS, NULL, NULL);
6100   TYPE_FIND_REGISTER (plugin, "audio/x-wavpack-correction", GST_RANK_SECONDARY,
6101       wavpack_type_find, "wvc", WAVPACK_CORRECTION_CAPS, NULL, NULL);
6102   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-caf", GST_RANK_SECONDARY,
6103       "caf", "caff\000\001", 6, GST_TYPE_FIND_MAXIMUM);
6104   TYPE_FIND_REGISTER (plugin, "application/postscript", GST_RANK_SECONDARY,
6105       postscript_type_find, "ps", POSTSCRIPT_CAPS, NULL, NULL);
6106   TYPE_FIND_REGISTER (plugin, "image/svg+xml", GST_RANK_SECONDARY,
6107       svg_type_find, "svg", SVG_CAPS, NULL, NULL);
6108   TYPE_FIND_REGISTER_START_WITH (plugin, "application/x-rar",
6109       GST_RANK_SECONDARY, "rar", "Rar!", 4, GST_TYPE_FIND_LIKELY);
6110   TYPE_FIND_REGISTER (plugin, "application/x-tar", GST_RANK_SECONDARY,
6111       tar_type_find, "tar", TAR_CAPS, NULL, NULL);
6112   TYPE_FIND_REGISTER (plugin, "application/x-ar", GST_RANK_SECONDARY,
6113       ar_type_find, "a", AR_CAPS, NULL, NULL);
6114   TYPE_FIND_REGISTER (plugin, "application/x-ms-dos-executable",
6115       GST_RANK_SECONDARY, msdos_type_find, "dll,exe,ocx,sys,scr,msstyles,cpl",
6116       MSDOS_CAPS, NULL, NULL);
6117   TYPE_FIND_REGISTER (plugin, "video/x-dirac", GST_RANK_PRIMARY,
6118       dirac_type_find, NULL, DIRAC_CAPS, NULL, NULL);
6119   TYPE_FIND_REGISTER (plugin, "multipart/x-mixed-replace", GST_RANK_SECONDARY,
6120       multipart_type_find, NULL, MULTIPART_CAPS, NULL, NULL);
6121   TYPE_FIND_REGISTER (plugin, "application/x-mmsh", GST_RANK_SECONDARY,
6122       mmsh_type_find, NULL, MMSH_CAPS, NULL, NULL);
6123   TYPE_FIND_REGISTER (plugin, "video/vivo", GST_RANK_SECONDARY, vivo_type_find,
6124       "viv", VIVO_CAPS, NULL, NULL);
6125   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-nsf", GST_RANK_SECONDARY,
6126       "nsf", "NESM\x1a", 5, GST_TYPE_FIND_MAXIMUM);
6127   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-gym", GST_RANK_SECONDARY,
6128       "gym", "GYMX", 4, GST_TYPE_FIND_MAXIMUM);
6129   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-ay", GST_RANK_SECONDARY, "ay",
6130       "ZXAYEMUL", 8, GST_TYPE_FIND_MAXIMUM);
6131   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-gbs", GST_RANK_SECONDARY,
6132       "gbs", "GBS\x01", 4, GST_TYPE_FIND_MAXIMUM);
6133   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-vgm", GST_RANK_SECONDARY,
6134       "vgm", "Vgm\x20", 4, GST_TYPE_FIND_MAXIMUM);
6135   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-sap", GST_RANK_SECONDARY,
6136       "sap", "SAP\x0d\x0a" "AUTHOR\x20", 12, GST_TYPE_FIND_MAXIMUM);
6137   TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-ivf", GST_RANK_SECONDARY,
6138       "ivf", "DKIF", 4, GST_TYPE_FIND_NEARLY_CERTAIN);
6139   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-kss", GST_RANK_SECONDARY,
6140       "kss", "KSSX\0", 5, GST_TYPE_FIND_MAXIMUM);
6141   TYPE_FIND_REGISTER_START_WITH (plugin, "application/pdf", GST_RANK_SECONDARY,
6142       "pdf", "%PDF-", 5, GST_TYPE_FIND_LIKELY);
6143   TYPE_FIND_REGISTER_START_WITH (plugin, "application/msword",
6144       GST_RANK_SECONDARY, "doc", "\320\317\021\340\241\261\032\341", 8,
6145       GST_TYPE_FIND_LIKELY);
6146   /* Mac OS X .DS_Store files tend to be taken for video/mpeg */
6147   TYPE_FIND_REGISTER_START_WITH (plugin, "application/octet-stream",
6148       GST_RANK_SECONDARY, "DS_Store", "\000\000\000\001Bud1", 8,
6149       GST_TYPE_FIND_LIKELY);
6150   TYPE_FIND_REGISTER_START_WITH (plugin, "image/vnd.adobe.photoshop",
6151       GST_RANK_SECONDARY, "psd", "8BPS\000\001\000\000\000\000", 10,
6152       GST_TYPE_FIND_LIKELY);
6153   TYPE_FIND_REGISTER (plugin, "image/vnd.wap.wbmp", GST_RANK_MARGINAL,
6154       wbmp_typefind, NULL, NULL, NULL, NULL);
6155   TYPE_FIND_REGISTER (plugin, "application/x-yuv4mpeg", GST_RANK_SECONDARY,
6156       y4m_typefind, NULL, NULL, NULL, NULL);
6157   TYPE_FIND_REGISTER (plugin, "image/x-icon", GST_RANK_MARGINAL,
6158       windows_icon_typefind, NULL, NULL, NULL, NULL);
6159 
6160 #ifdef USE_GIO
6161   TYPE_FIND_REGISTER (plugin, "xdgmime-base", GST_RANK_MARGINAL,
6162       xdgmime_typefind, NULL, NULL, NULL, NULL);
6163 #endif
6164 
6165   TYPE_FIND_REGISTER (plugin, "image/x-degas", GST_RANK_MARGINAL,
6166       degas_type_find, NULL, NULL, NULL, NULL);
6167   TYPE_FIND_REGISTER (plugin, "application/octet-stream", GST_RANK_MARGINAL,
6168       dvdiso_type_find, NULL, NULL, NULL, NULL);
6169 
6170   TYPE_FIND_REGISTER (plugin, "application/x-ssa", GST_RANK_SECONDARY,
6171       ssa_type_find, "ssa,ass", NULL, NULL, NULL);
6172 
6173   TYPE_FIND_REGISTER (plugin, "video/x-pva", GST_RANK_SECONDARY,
6174       pva_type_find, "pva", PVA_CAPS, NULL, NULL);
6175 
6176   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-xi", GST_RANK_SECONDARY,
6177       "xi", "Extended Instrument: ", 21, GST_TYPE_FIND_MAXIMUM);
6178 
6179   TYPE_FIND_REGISTER (plugin, "audio/audible", GST_RANK_MARGINAL,
6180       aa_type_find, "aa,aax", AA_CAPS, NULL, NULL);
6181 
6182   TYPE_FIND_REGISTER (plugin, "audio/x-tap-tap", GST_RANK_PRIMARY,
6183       tap_type_find, "tap", TAP_CAPS, NULL, NULL);
6184   TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-tap-dmp",
6185       GST_RANK_SECONDARY, "dmp", "DC2N-TAP-RAW", 12, GST_TYPE_FIND_LIKELY);
6186 
6187   return TRUE;
6188 }
6189 
6190 GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
6191     GST_VERSION_MINOR,
6192     typefindfunctions,
6193     "default typefind functions",
6194     plugin_init, VERSION, GST_LICENSE, GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)
6195