• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer
2  * Copyright (C) 2003 Benjamin Otte <in7y118@public.uni-hamburg.de>
3  * Copyright (C) 2005-2009 Tim-Philipp Müller <tim centricular net>
4  * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
5  *
6  * gsttypefindfunctions.c: collection of various typefind functions
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public
19  * License along with this library; if not, write to the
20  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include <glib.h>
29 #include <glib/gprintf.h>
30 
31 /* don't want to add gio xdgmime typefinder if gio was disabled via configure */
32 #ifdef HAVE_GIO
33 #include <gio/gio.h>
34 #define USE_GIO
35 #endif
36 
37 #include <gst/gst.h>
38 
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 
43 #include <gst/pbutils/pbutils.h>
44 #include <gst/base/gstbytereader.h>
45 
46 #include "gsttypefindfunctionsplugin.h"
47 
48 /* DataScanCtx: helper for typefind functions that scan through data
49  * step-by-step, to avoid doing a peek at each and every offset */
50 
51 #define DATA_SCAN_CTX_CHUNK_SIZE 4096
52 
53 typedef struct
54 {
55   guint64 offset;
56   const guint8 *data;
57   guint size;
58 } DataScanCtx;
59 
60 static inline void
data_scan_ctx_advance(GstTypeFind * tf,DataScanCtx * c,guint bytes_to_skip)61 data_scan_ctx_advance (GstTypeFind * tf, DataScanCtx * c, guint bytes_to_skip)
62 {
63   c->offset += bytes_to_skip;
64   if (G_LIKELY (c->size > bytes_to_skip)) {
65     c->size -= bytes_to_skip;
66     c->data += bytes_to_skip;
67   } else {
68     c->data += c->size;
69     c->size = 0;
70   }
71 }
72 
73 static inline gboolean
data_scan_ctx_ensure_data(GstTypeFind * tf,DataScanCtx * c,guint min_len)74 data_scan_ctx_ensure_data (GstTypeFind * tf, DataScanCtx * c, guint min_len)
75 {
76   const guint8 *data;
77   guint64 len;
78   guint chunk_len = MAX (DATA_SCAN_CTX_CHUNK_SIZE, min_len);
79 
80   if (G_LIKELY (c->size >= min_len))
81     return TRUE;
82 
83   data = gst_type_find_peek (tf, c->offset, chunk_len);
84   if (G_LIKELY (data != NULL)) {
85     c->data = data;
86     c->size = chunk_len;
87     return TRUE;
88   }
89 
90   /* if there's less than our chunk size, try to get as much as we can, but
91    * always at least min_len bytes (we might be typefinding the first buffer
92    * of the stream and not have as much data available as we'd like) */
93   len = gst_type_find_get_length (tf);
94   if (len > 0) {
95     len = CLAMP (len - c->offset, min_len, chunk_len);
96   } else {
97     len = min_len;
98   }
99 
100   data = gst_type_find_peek (tf, c->offset, len);
101   if (data != NULL) {
102     c->data = data;
103     c->size = len;
104     return TRUE;
105   }
106 
107   return FALSE;
108 }
109 
110 static inline gboolean
data_scan_ctx_memcmp(GstTypeFind * tf,DataScanCtx * c,guint offset,const gchar * data,guint len)111 data_scan_ctx_memcmp (GstTypeFind * tf, DataScanCtx * c, guint offset,
112     const gchar * data, guint len)
113 {
114   if (G_UNLIKELY (offset + len >= G_MAXUINT32))
115     return FALSE;
116 
117   if (!data_scan_ctx_ensure_data (tf, c, offset + len))
118     return FALSE;
119 
120   return (memcmp (c->data + offset, data, len) == 0);
121 }
122 
123 /*** text/plain ***/
124 static gboolean xml_check_first_element (GstTypeFind * tf,
125     const gchar * element, guint elen, gboolean strict);
126 static gboolean sdp_check_header (GstTypeFind * tf);
127 
128 static GstStaticCaps utf8_caps = GST_STATIC_CAPS ("text/plain");
129 
130 #define UTF8_CAPS gst_static_caps_get(&utf8_caps)
131 
132 static gboolean
utf8_type_find_have_valid_utf8_at_offset(GstTypeFind * tf,guint64 offset,GstTypeFindProbability * prob)133 utf8_type_find_have_valid_utf8_at_offset (GstTypeFind * tf, guint64 offset,
134     GstTypeFindProbability * prob)
135 {
136   const guint8 *data;
137 
138   /* randomly decided values */
139   guint min_size = 16;          /* minimum size  */
140   guint size = 32 * 1024;       /* starting size */
141   guint probability = 95;       /* starting probability */
142   guint step = 10;              /* how much we reduce probability in each
143                                  * iteration */
144 
145   while (probability > step && size > min_size) {
146     data = gst_type_find_peek (tf, offset, size);
147     if (data) {
148       gchar *end;
149       gchar *start = (gchar *) data;
150 
151       if (g_utf8_validate (start, size, (const gchar **) &end) || (end - start + 4 > size)) {   /* allow last char to be cut off */
152         *prob = probability;
153         return TRUE;
154       }
155       *prob = 0;
156       return FALSE;
157     }
158     size /= 2;
159     probability -= step;
160   }
161   *prob = 0;
162   return FALSE;
163 }
164 
165 static void
utf8_type_find(GstTypeFind * tf,gpointer unused)166 utf8_type_find (GstTypeFind * tf, gpointer unused)
167 {
168   GstTypeFindProbability start_prob, mid_prob;
169   guint64 length;
170 
171   /* leave xml to the xml typefinders */
172   if (xml_check_first_element (tf, "", 0, TRUE))
173     return;
174 
175   /* leave sdp to the sdp typefinders */
176   if (sdp_check_header (tf))
177     return;
178 
179   /* check beginning of stream */
180   if (!utf8_type_find_have_valid_utf8_at_offset (tf, 0, &start_prob))
181     return;
182 
183   GST_LOG ("start is plain text with probability of %u", start_prob);
184 
185   /* POSSIBLE is the highest probability we ever return if we can't
186    * probe into the middle of the file and don't know its length */
187 
188   length = gst_type_find_get_length (tf);
189   if (length == 0 || length == (guint64) - 1) {
190     gst_type_find_suggest (tf, MIN (start_prob, GST_TYPE_FIND_POSSIBLE),
191         UTF8_CAPS);
192     return;
193   }
194 
195   if (length < 64 * 1024) {
196     gst_type_find_suggest (tf, start_prob, UTF8_CAPS);
197     return;
198   }
199 
200   /* check middle of stream */
201   if (!utf8_type_find_have_valid_utf8_at_offset (tf, length / 2, &mid_prob))
202     return;
203 
204   GST_LOG ("middle is plain text with probability of %u", mid_prob);
205   gst_type_find_suggest (tf, (start_prob + mid_prob) / 2, UTF8_CAPS);
206 }
207 
208 /*** text/utf-16 and text/utf-32} ***/
209 /* While UTF-8 is unicode too, using text/plain for UTF-16 and UTF-32
210    is going to break stuff. */
211 
212 typedef struct
213 {
214   size_t bomlen;
215   const char *const bom;
216     gboolean (*checker) (const guint8 *, gint, gint);
217   int boost;
218   int endianness;
219 } GstUnicodeTester;
220 
221 static gboolean
check_utf16(const guint8 * data,gint len,gint endianness)222 check_utf16 (const guint8 * data, gint len, gint endianness)
223 {
224   GstByteReader br;
225   guint16 high, low;
226 
227   low = high = 0;
228 
229   if (len & 1)
230     return FALSE;
231 
232   gst_byte_reader_init (&br, data, len);
233   while (len >= 2) {
234     /* test first for a single 16 bit value in the BMP */
235     if (endianness == G_BIG_ENDIAN)
236       high = gst_byte_reader_get_uint16_be_unchecked (&br);
237     else
238       high = gst_byte_reader_get_uint16_le_unchecked (&br);
239     if (high >= 0xD800 && high <= 0xDBFF) {
240       /* start of a surrogate pair */
241       if (len < 4)
242         return FALSE;
243       len -= 2;
244       if (endianness == G_BIG_ENDIAN)
245         low = gst_byte_reader_get_uint16_be_unchecked (&br);
246       else
247         low = gst_byte_reader_get_uint16_le_unchecked (&br);
248       if (low >= 0xDC00 && low <= 0xDFFF) {
249         /* second half of the surrogate pair */
250       } else
251         return FALSE;
252     } else {
253       if (high >= 0xDC00 && high <= 0xDFFF)
254         return FALSE;
255     }
256     len -= 2;
257   }
258   return TRUE;
259 }
260 
261 static gboolean
check_utf32(const guint8 * data,gint len,gint endianness)262 check_utf32 (const guint8 * data, gint len, gint endianness)
263 {
264   if (len & 3)
265     return FALSE;
266   while (len > 3) {
267     guint32 v;
268     if (endianness == G_BIG_ENDIAN)
269       v = GST_READ_UINT32_BE (data);
270     else
271       v = GST_READ_UINT32_LE (data);
272     if (v >= 0x10FFFF)
273       return FALSE;
274     data += 4;
275     len -= 4;
276   }
277   return TRUE;
278 }
279 
280 static void
unicode_type_find(GstTypeFind * tf,const GstUnicodeTester * tester,guint n_tester,const char * media_type,gboolean require_bom)281 unicode_type_find (GstTypeFind * tf, const GstUnicodeTester * tester,
282     guint n_tester, const char *media_type, gboolean require_bom)
283 {
284   gsize n;
285   gsize len = 4;
286   const guint8 *data = gst_type_find_peek (tf, 0, len);
287   int prob = -1;
288   const gint max_scan_size = 256 * 1024;
289   int endianness = 0;
290 
291   if (!data) {
292     len = 2;
293     data = gst_type_find_peek (tf, 0, len);
294     if (!data)
295       return;
296   }
297 
298   /* find a large enough size that works */
299   while (len < max_scan_size) {
300     size_t newlen = len << 1;
301     const guint8 *newdata = gst_type_find_peek (tf, 0, newlen);
302     if (!newdata)
303       break;
304     len = newlen;
305     data = newdata;
306   }
307 
308   for (n = 0; n < n_tester; ++n) {
309     int bom_boost = 0, tmpprob;
310     if (len >= tester[n].bomlen) {
311       if (!memcmp (data, tester[n].bom, tester[n].bomlen))
312         bom_boost = tester[n].boost;
313     }
314     if (require_bom && bom_boost == 0)
315       continue;
316     if (!(*tester[n].checker) (data, len, tester[n].endianness))
317       continue;
318     tmpprob = GST_TYPE_FIND_POSSIBLE - 20 + bom_boost;
319     if (tmpprob > prob) {
320       prob = tmpprob;
321       endianness = tester[n].endianness;
322     }
323   }
324 
325   if (prob > 0) {
326     GST_DEBUG ("This is valid %s %s", media_type,
327         endianness == G_BIG_ENDIAN ? "be" : "le");
328     gst_type_find_suggest_simple (tf, prob, media_type,
329         "endianness", G_TYPE_INT, endianness, NULL);
330   }
331 }
332 
333 static GstStaticCaps utf16_caps = GST_STATIC_CAPS ("text/utf-16");
334 
335 #define UTF16_CAPS gst_static_caps_get(&utf16_caps)
336 
337 static void
utf16_type_find(GstTypeFind * tf,gpointer unused)338 utf16_type_find (GstTypeFind * tf, gpointer unused)
339 {
340   static const GstUnicodeTester utf16tester[2] = {
341     {2, "\xff\xfe", check_utf16, 10, G_LITTLE_ENDIAN},
342     {2, "\xfe\xff", check_utf16, 20, G_BIG_ENDIAN},
343   };
344   unicode_type_find (tf, utf16tester, G_N_ELEMENTS (utf16tester),
345       "text/utf-16", TRUE);
346 }
347 
348 static GstStaticCaps utf32_caps = GST_STATIC_CAPS ("text/utf-32");
349 
350 #define UTF32_CAPS gst_static_caps_get(&utf32_caps)
351 
352 static void
utf32_type_find(GstTypeFind * tf,gpointer unused)353 utf32_type_find (GstTypeFind * tf, gpointer unused)
354 {
355   static const GstUnicodeTester utf32tester[2] = {
356     {4, "\xff\xfe\x00\x00", check_utf32, 10, G_LITTLE_ENDIAN},
357     {4, "\x00\x00\xfe\xff", check_utf32, 20, G_BIG_ENDIAN}
358   };
359   unicode_type_find (tf, utf32tester, G_N_ELEMENTS (utf32tester),
360       "text/utf-32", TRUE);
361 }
362 
363 /*** text/uri-list ***/
364 
365 static GstStaticCaps uri_caps = GST_STATIC_CAPS ("text/uri-list");
366 
367 #define URI_CAPS (gst_static_caps_get(&uri_caps))
368 #define BUFFER_SIZE 16          /* If the string is < 16 bytes we're screwed */
369 #define INC_BUFFER {                                                    \
370   pos++;                                                                \
371   if (pos == BUFFER_SIZE) {                                             \
372     pos = 0;                                                            \
373     offset += BUFFER_SIZE;                                              \
374     data = gst_type_find_peek (tf, offset, BUFFER_SIZE);                \
375     if (data == NULL) return;                                           \
376   } else {                                                              \
377     data++;                                                             \
378   }                                                                     \
379 }
380 static void
uri_type_find(GstTypeFind * tf,gpointer unused)381 uri_type_find (GstTypeFind * tf, gpointer unused)
382 {
383   const guint8 *data = gst_type_find_peek (tf, 0, BUFFER_SIZE);
384   guint pos = 0;
385   guint offset = 0;
386 
387   if (data) {
388     /* Search for # comment lines */
389     while (*data == '#') {
390       /* Goto end of line */
391       while (*data != '\n') {
392         INC_BUFFER;
393       }
394 
395       INC_BUFFER;
396     }
397 
398     if (!g_ascii_isalpha (*data)) {
399       /* Had a non alpha char - can't be uri-list */
400       return;
401     }
402 
403     INC_BUFFER;
404 
405     while (g_ascii_isalnum (*data)) {
406       INC_BUFFER;
407     }
408 
409     if (*data != ':') {
410       /* First non alpha char is not a : */
411       return;
412     }
413 
414     /* Get the next 2 bytes as well */
415     data = gst_type_find_peek (tf, offset + pos, 3);
416     if (data == NULL)
417       return;
418 
419     if (data[1] != '/' && data[2] != '/') {
420       return;
421     }
422 
423     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, URI_CAPS);
424   }
425 }
426 
427 /*** application/itc ***/
428 static GstStaticCaps itc_caps = GST_STATIC_CAPS ("application/itc");
429 #define ITC_CAPS (gst_static_caps_get(&itc_caps))
430 
431 static void
itc_type_find(GstTypeFind * tf,gpointer unused)432 itc_type_find (GstTypeFind * tf, gpointer unused)
433 {
434   DataScanCtx c = { 0, NULL, 0 };
435   guint8 magic[8] = { 0x00, 0x00, 0x01, 0x1C, 0x69, 0x74, 0x63, 0x68 };
436   guint8 preamble[4] = { 0x00, 0x00, 0x00, 0x02 };
437   guint8 artwork_marker[8] = { 0x00, 0x00, 0x00, 0x00, 0x61, 0x72, 0x74, 0x77 };
438   guint8 item_marker[4] = { 0x69, 0x74, 0x65, 0x6D };
439   GstTypeFindProbability itc_prob = GST_TYPE_FIND_NONE;
440   int i;
441 
442   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 8)))
443     return;
444 
445   if (memcmp (c.data, magic, 8))
446     return;
447 
448   /* At least we found the right magic */
449   itc_prob = GST_TYPE_FIND_MINIMUM;
450   data_scan_ctx_advance (tf, &c, 8);
451 
452   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 12)))
453     goto done;
454 
455   /* Check preamble 3 consecutive times */
456   for (i = 0; i < 3; i++) {
457     if (memcmp (c.data, preamble, 4))
458       goto done;
459     data_scan_ctx_advance (tf, &c, 4);
460   }
461 
462   itc_prob = GST_TYPE_FIND_POSSIBLE;
463 
464   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 8)))
465     goto done;
466 
467   if (memcmp (c.data, artwork_marker, 8))
468     goto done;
469 
470   itc_prob = GST_TYPE_FIND_LIKELY;
471   data_scan_ctx_advance (tf, &c, 8);
472 
473   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 256)))
474     goto done;
475 
476   /* ...and 256 0x00 padding bytes on what looks like the header's end */
477   for (i = 0; i < 256; i++) {
478     if (c.data[i])
479       goto done;
480   }
481 
482   itc_prob = GST_TYPE_FIND_NEARLY_CERTAIN;
483   data_scan_ctx_advance (tf, &c, 256);
484 
485   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 8)))
486     goto done;
487 
488   if (memcmp (c.data + 4, item_marker, 4))
489     goto done;
490 
491   itc_prob = GST_TYPE_FIND_MAXIMUM;
492 
493 done:
494   gst_type_find_suggest (tf, itc_prob, ITC_CAPS);
495 }
496 
497 /*** application/x-hls ***/
498 
499 static GstStaticCaps hls_caps = GST_STATIC_CAPS ("application/x-hls");
500 #define HLS_CAPS (gst_static_caps_get(&hls_caps))
501 
502 /* See http://tools.ietf.org/html/draft-pantos-http-live-streaming-05 */
503 static void
hls_type_find(GstTypeFind * tf,gpointer unused)504 hls_type_find (GstTypeFind * tf, gpointer unused)
505 {
506   DataScanCtx c = { 0, NULL, 0 };
507 
508   /* Minimum useful size is #EXTM3U\n + 1 tag + ':' = 30 bytes */
509   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 30)))
510     return;
511 
512   if (memcmp (c.data, "#EXTM3U", 7))
513     return;
514 
515   data_scan_ctx_advance (tf, &c, 7);
516 
517   /* Check only the first 4KB */
518   while (c.offset < 4096) {
519     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 21)))
520       return;
521 
522     /* Search for # comment lines */
523     if (c.data[0] == '#' && (memcmp (c.data, "#EXT-X-TARGETDURATION", 21) == 0
524             || memcmp (c.data, "#EXT-X-STREAM-INF", 17) == 0
525             || memcmp (c.data, "#EXT-X-MEDIA", 12) == 0)) {
526       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HLS_CAPS);
527       return;
528     }
529 
530     data_scan_ctx_advance (tf, &c, 1);
531   }
532 }
533 
534 
535 /*** application/xml **********************************************************/
536 
537 #define XML_BUFFER_SIZE 16
538 #define XML_INC_BUFFER {                                                \
539   pos++;                                                                \
540   if (pos == XML_BUFFER_SIZE) {                                         \
541     pos = 0;                                                            \
542     offset += XML_BUFFER_SIZE;                                          \
543     data = gst_type_find_peek (tf, offset, XML_BUFFER_SIZE);            \
544     if (data == NULL) return FALSE;                                     \
545   } else {                                                              \
546     data++;                                                             \
547   }                                                                     \
548 }
549 
550 #define XML_INC_BUFFER_DATA {                                           \
551   pos++;                                                                \
552   if (pos >= length) {                                                  \
553     return FALSE;                                                       \
554   } else {                                                              \
555     data++;                                                             \
556   }                                                                     \
557 }
558 
559 static gboolean
xml_check_first_element_from_data(const guint8 * data,guint length,const gchar * element,guint elen,gboolean strict)560 xml_check_first_element_from_data (const guint8 * data, guint length,
561     const gchar * element, guint elen, gboolean strict)
562 {
563   gboolean got_xmldec;
564   guint pos = 0;
565 
566   g_return_val_if_fail (data != NULL, FALSE);
567 
568   if (length <= 5)
569     return FALSE;
570 
571   /* look for the XMLDec
572    * see XML spec 2.8, Prolog and Document Type Declaration
573    * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
574   got_xmldec = (memcmp (data, "<?xml", 5) == 0);
575 
576   if (strict && !got_xmldec)
577     return FALSE;
578 
579   /* skip XMLDec in any case if we've got one */
580   if (got_xmldec) {
581     pos += 5;
582     data += 5;
583   }
584 
585   /* look for the first element, it has to be the requested element. Bail
586    * out if it is not within the first 4kB. */
587   while (pos < MIN (4096, length)) {
588     while (*data != '<' && pos < MIN (4096, length)) {
589       XML_INC_BUFFER_DATA;
590     }
591 
592     XML_INC_BUFFER_DATA;
593     if (!g_ascii_isalpha (*data)) {
594       /* if not alphabetic, it's a PI or an element / attribute declaration
595        * like <?xxx or <!xxx */
596       XML_INC_BUFFER_DATA;
597       continue;
598     }
599 
600     /* the first normal element, check if it's the one asked for */
601     if (pos + elen + 1 >= length)
602       return FALSE;
603     return (element && strncmp ((const char *) data, element, elen) == 0);
604   }
605 
606   return FALSE;
607 }
608 
609 static gboolean
xml_check_first_element(GstTypeFind * tf,const gchar * element,guint elen,gboolean strict)610 xml_check_first_element (GstTypeFind * tf, const gchar * element, guint elen,
611     gboolean strict)
612 {
613   gboolean got_xmldec;
614   const guint8 *data;
615   guint offset = 0;
616   guint pos = 0;
617 
618   data = gst_type_find_peek (tf, 0, XML_BUFFER_SIZE);
619   if (!data)
620     return FALSE;
621 
622   /* look for the XMLDec
623    * see XML spec 2.8, Prolog and Document Type Declaration
624    * http://www.w3.org/TR/2004/REC-xml-20040204/#sec-prolog-dtd */
625   got_xmldec = (memcmp (data, "<?xml", 5) == 0);
626 
627   if (strict && !got_xmldec)
628     return FALSE;
629 
630   /* skip XMLDec in any case if we've got one */
631   if (got_xmldec) {
632     pos += 5;
633     data += 5;
634   }
635 
636   /* look for the first element, it has to be the requested element. Bail
637    * out if it is not within the first 4kB. */
638   while (data && (offset + pos) < 4096) {
639     while (*data != '<' && (offset + pos) < 4096) {
640       XML_INC_BUFFER;
641     }
642 
643     XML_INC_BUFFER;
644     if (!g_ascii_isalpha (*data)) {
645       /* if not alphabetic, it's a PI or an element / attribute declaration
646        * like <?xxx or <!xxx */
647       XML_INC_BUFFER;
648       continue;
649     }
650 
651     /* the first normal element, check if it's the one asked for */
652     data = gst_type_find_peek (tf, offset + pos, elen + 1);
653     return (data && element && strncmp ((char *) data, element, elen) == 0);
654   }
655 
656   return FALSE;
657 }
658 
659 static GstStaticCaps generic_xml_caps = GST_STATIC_CAPS ("application/xml");
660 
661 #define GENERIC_XML_CAPS (gst_static_caps_get(&generic_xml_caps))
662 static void
xml_type_find(GstTypeFind * tf,gpointer unused)663 xml_type_find (GstTypeFind * tf, gpointer unused)
664 {
665   if (xml_check_first_element (tf, "", 0, TRUE)) {
666     gst_type_find_suggest (tf, GST_TYPE_FIND_MINIMUM, GENERIC_XML_CAPS);
667   }
668 }
669 
670 /*** application/dash+xml ****************************************************/
671 
672 static GstStaticCaps dash_caps = GST_STATIC_CAPS ("application/dash+xml");
673 
674 #define DASH_CAPS gst_static_caps_get (&dash_caps)
675 
676 static void
dash_mpd_type_find(GstTypeFind * tf,gpointer unused)677 dash_mpd_type_find (GstTypeFind * tf, gpointer unused)
678 {
679   if (xml_check_first_element (tf, "MPD", 3, FALSE) ||
680       xml_check_first_element (tf, "mpd", 3, FALSE)) {
681     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, DASH_CAPS);
682   }
683 }
684 
685 /*** application/xges ****************************************************/
686 
687 static GstStaticCaps xges_caps = GST_STATIC_CAPS ("application/xges");
688 
689 #define XGES_CAPS gst_static_caps_get (&xges_caps)
690 
691 static void
xges_type_find(GstTypeFind * tf,gpointer unused)692 xges_type_find (GstTypeFind * tf, gpointer unused)
693 {
694   if (xml_check_first_element (tf, "ges", 3, FALSE)) {
695     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, XGES_CAPS);
696   }
697 }
698 
699 /***application/vnd.apple-fcp+xml ****************************************************/
700 
701 static GstStaticCaps fcpxml_caps =
702 GST_STATIC_CAPS ("application/vnd.apple-fcp+xml");
703 
704 #define FCPXML_CAPS gst_static_caps_get (&fcpxml_caps)
705 
706 static void
fcpxml_type_find(GstTypeFind * tf,gpointer unused)707 fcpxml_type_find (GstTypeFind * tf, gpointer unused)
708 {
709   if (xml_check_first_element (tf, "fcpxml", 3, FALSE)) {
710     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FCPXML_CAPS);
711   }
712 }
713 
714 /*** application/vnd.apple-xmeml+xml ****************************************************/
715 
716 static GstStaticCaps xmeml_caps =
717 GST_STATIC_CAPS ("application/vnd.apple-xmeml+xml");
718 
719 #define XMEML_CAPS gst_static_caps_get (&xmeml_caps)
720 
721 static void
xmeml_type_find(GstTypeFind * tf,gpointer unused)722 xmeml_type_find (GstTypeFind * tf, gpointer unused)
723 {
724   if (xml_check_first_element (tf, "xmeml", 3, FALSE)) {
725     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, XMEML_CAPS);
726   }
727 }
728 
729 /*** application/otio ****************************************************/
730 
731 static GstStaticCaps otio_caps =
732 GST_STATIC_CAPS ("application/vnd.pixar.opentimelineio+json");
733 
734 #define OTIO_CAPS gst_static_caps_get (&otio_caps)
735 
736 static void
otio_type_find(GstTypeFind * tf,gpointer unused)737 otio_type_find (GstTypeFind * tf, gpointer unused)
738 {
739   const gchar *data, *tmp;
740 
741   data = (const gchar *) gst_type_find_peek (tf, 0, 30);
742   if (!data)
743     return;
744 
745   tmp = (const gchar *) memchr (data, '{', 30);
746   if (!tmp)
747     return;
748 
749   data = (const gchar *) gst_type_find_peek (tf, tmp - data, 30);
750   if (!data)
751     return;
752 
753   tmp = (const gchar *) memchr (data, '"', 30);
754   if (!tmp)
755     return;
756 
757   data = (const gchar *) gst_type_find_peek (tf, tmp - data, 14);
758   if (!data)
759     return;
760 
761   if (memcmp (data, "\"OTIO_SCHEMA\":", 14) == 0) {
762     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OTIO_CAPS);
763   }
764 }
765 
766 
767 /*** application/sdp *********************************************************/
768 
769 static GstStaticCaps sdp_caps = GST_STATIC_CAPS ("application/sdp");
770 
771 #define SDP_CAPS (gst_static_caps_get(&sdp_caps))
772 static gboolean
sdp_check_header(GstTypeFind * tf)773 sdp_check_header (GstTypeFind * tf)
774 {
775   const guint8 *data;
776 
777   data = gst_type_find_peek (tf, 0, 5);
778   if (!data)
779     return FALSE;
780 
781   /* sdp must start with v=0[\r]\n */
782   if (memcmp (data, "v=0", 3))
783     return FALSE;
784 
785   if (data[3] == '\r' && data[4] == '\n')
786     return TRUE;
787   if (data[3] == '\n')
788     return TRUE;
789 
790   return FALSE;
791 }
792 
793 static void
sdp_type_find(GstTypeFind * tf,gpointer unused)794 sdp_type_find (GstTypeFind * tf, gpointer unused)
795 {
796   if (sdp_check_header (tf))
797     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SDP_CAPS);
798 }
799 
800 /*** application/smil *********************************************************/
801 
802 static GstStaticCaps smil_caps = GST_STATIC_CAPS ("application/smil");
803 
804 #define SMIL_CAPS (gst_static_caps_get(&smil_caps))
805 static void
smil_type_find(GstTypeFind * tf,gpointer unused)806 smil_type_find (GstTypeFind * tf, gpointer unused)
807 {
808   if (xml_check_first_element (tf, "smil", 4, FALSE)) {
809     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SMIL_CAPS);
810   }
811 }
812 
813 /*** application/ttml+xml *****************************************************/
814 
815 static GstStaticCaps ttml_xml_caps = GST_STATIC_CAPS ("application/ttml+xml");
816 
817 #define TTML_XML_CAPS (gst_static_caps_get(&ttml_xml_caps))
818 static void
ttml_xml_type_find(GstTypeFind * tf,gpointer unused)819 ttml_xml_type_find (GstTypeFind * tf, gpointer unused)
820 {
821   if (xml_check_first_element (tf, "tt", 2, FALSE)) {
822     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TTML_XML_CAPS);
823   }
824 }
825 
826 /*** text/html ***/
827 
828 static GstStaticCaps html_caps = GST_STATIC_CAPS ("text/html");
829 
830 #define HTML_CAPS gst_static_caps_get (&html_caps)
831 
832 static void
html_type_find(GstTypeFind * tf,gpointer unused)833 html_type_find (GstTypeFind * tf, gpointer unused)
834 {
835   const gchar *d, *data;
836 
837   data = (const gchar *) gst_type_find_peek (tf, 0, 16);
838   if (!data)
839     return;
840 
841   if (!g_ascii_strncasecmp (data, "<!DOCTYPE HTML", 14)) {
842     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
843   } else if (xml_check_first_element (tf, "html", 4, FALSE)) {
844     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
845   } else if ((d = memchr (data, '<', 16))) {
846     data = (const gchar *) gst_type_find_peek (tf, d - data, 6);
847     if (data && g_ascii_strncasecmp (data, "<html>", 6) == 0) {
848       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, HTML_CAPS);
849     }
850   }
851 }
852 
853 /*** audio/midi ***/
854 
855 static GstStaticCaps mid_caps = GST_STATIC_CAPS ("audio/midi");
856 
857 #define MID_CAPS gst_static_caps_get(&mid_caps)
858 static void
mid_type_find(GstTypeFind * tf,gpointer unused)859 mid_type_find (GstTypeFind * tf, gpointer unused)
860 {
861   const guint8 *data = gst_type_find_peek (tf, 0, 4);
862 
863   /* http://jedi.ks.uiuc.edu/~johns/links/music/midifile.html */
864   if (data && data[0] == 'M' && data[1] == 'T' && data[2] == 'h'
865       && data[3] == 'd')
866     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MID_CAPS);
867 }
868 
869 /*** audio/mobile-xmf ***/
870 
871 static GstStaticCaps mxmf_caps = GST_STATIC_CAPS ("audio/mobile-xmf");
872 
873 #define MXMF_CAPS gst_static_caps_get(&mxmf_caps)
874 static void
mxmf_type_find(GstTypeFind * tf,gpointer unused)875 mxmf_type_find (GstTypeFind * tf, gpointer unused)
876 {
877   const guint8 *data = NULL;
878 
879   /* Search FileId "XMF_" 4 bytes */
880   data = gst_type_find_peek (tf, 0, 4);
881   if (data && data[0] == 'X' && data[1] == 'M' && data[2] == 'F'
882       && data[3] == '_') {
883     /* Search Format version "2.00" 4 bytes */
884     data = gst_type_find_peek (tf, 4, 4);
885     if (data && data[0] == '2' && data[1] == '.' && data[2] == '0'
886         && data[3] == '0') {
887       /* Search TypeId 2     1 byte */
888       data = gst_type_find_peek (tf, 11, 1);
889       if (data && data[0] == 2) {
890         gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MXMF_CAPS);
891       }
892     }
893   }
894 }
895 
896 
897 /*** video/x-fli ***/
898 
899 static GstStaticCaps flx_caps = GST_STATIC_CAPS ("video/x-fli");
900 
901 #define FLX_CAPS gst_static_caps_get(&flx_caps)
902 static void
flx_type_find(GstTypeFind * tf,gpointer unused)903 flx_type_find (GstTypeFind * tf, gpointer unused)
904 {
905   const guint8 *data = gst_type_find_peek (tf, 0, 134);
906 
907   if (data) {
908     /* check magic and the frame type of the first frame */
909     if ((data[4] == 0x11 || data[4] == 0x12 ||
910             data[4] == 0x30 || data[4] == 0x44) &&
911         data[5] == 0xaf &&
912         ((data[132] == 0x00 || data[132] == 0xfa) && data[133] == 0xf1)) {
913       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLX_CAPS);
914     }
915     return;
916   }
917   data = gst_type_find_peek (tf, 0, 6);
918   if (data) {
919     /* check magic only */
920     if ((data[4] == 0x11 || data[4] == 0x12 ||
921             data[4] == 0x30 || data[4] == 0x44) && data[5] == 0xaf) {
922       gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, FLX_CAPS);
923     }
924     return;
925   }
926 }
927 
928 /*** application/x-id3 ***/
929 
930 static GstStaticCaps id3_caps = GST_STATIC_CAPS ("application/x-id3");
931 
932 #define ID3_CAPS gst_static_caps_get(&id3_caps)
933 static void
id3v2_type_find(GstTypeFind * tf,gpointer unused)934 id3v2_type_find (GstTypeFind * tf, gpointer unused)
935 {
936 /* ohos.opt.compat.0001: The demux of gstplayer does not accurately parse audio resources in the MP3 format.
937  * As a result, the duration value cannot be obtained in the preparation phase.
938  * Use the demux and typefind of ffmpeg to process audio resources in MP3 format.
939  */
940 #ifdef OHOS_OPT_COMPAT
941   return;
942 #endif
943   const guint8 *data = gst_type_find_peek (tf, 0, 10);
944 
945   if (data && memcmp (data, "ID3", 3) == 0 &&
946       data[3] != 0xFF && data[4] != 0xFF &&
947       (data[6] & 0x80) == 0 && (data[7] & 0x80) == 0 &&
948       (data[8] & 0x80) == 0 && (data[9] & 0x80) == 0) {
949     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
950   }
951 }
952 
953 static void
id3v1_type_find(GstTypeFind * tf,gpointer unused)954 id3v1_type_find (GstTypeFind * tf, gpointer unused)
955 {
956 /* ohos.opt.compat.0001: The demux of gstplayer does not accurately parse audio resources in the MP3 format.
957  * As a result, the duration value cannot be obtained in the preparation phase.
958  * Use the demux and typefind of ffmpeg to process audio resources in MP3 format.
959  */
960 #ifdef OHOS_OPT_COMPAT
961   return;
962 #endif
963   const guint8 *data = gst_type_find_peek (tf, -128, 3);
964 
965   if (data && memcmp (data, "TAG", 3) == 0) {
966     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, ID3_CAPS);
967   }
968 }
969 
970 #ifdef OHOS_OPT_COMPAT
971 /*
972  * ohos.opt.compat.0051
973  * wav audio stream recognition errors
974  */
975 #else
976 /*** application/x-ape ***/
977 
978 static GstStaticCaps apetag_caps = GST_STATIC_CAPS ("application/x-apetag");
979 
980 #define APETAG_CAPS gst_static_caps_get(&apetag_caps)
981 static void
apetag_type_find(GstTypeFind * tf,gpointer unused)982 apetag_type_find (GstTypeFind * tf, gpointer unused)
983 {
984   const guint8 *data;
985 
986   /* APEv1/2 at start of file */
987   data = gst_type_find_peek (tf, 0, 8);
988   if (data && !memcmp (data, "APETAGEX", 8)) {
989 #ifdef OHOS_OPT_COMPAT
990     /*
991      * ohos.opt.compat.0051
992      * wav audio stream recognition errors
993      */
994     gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, APETAG_CAPS);
995 #else
996     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
997 #endif
998     return;
999   }
1000 
1001   /* APEv1/2 at end of file */
1002   data = gst_type_find_peek (tf, -32, 8);
1003   if (data && !memcmp (data, "APETAGEX", 8)) {
1004 #ifdef OHOS_OPT_COMPAT
1005     /*
1006      * ohos.opt.compat.0051
1007      * wav audio stream recognition errors
1008      */
1009     gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, APETAG_CAPS);
1010 #else
1011     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, APETAG_CAPS);
1012 #endif
1013     return;
1014   }
1015 }
1016 #endif
1017 /*** audio/x-ttafile ***/
1018 
1019 static GstStaticCaps tta_caps = GST_STATIC_CAPS ("audio/x-ttafile");
1020 
1021 #define TTA_CAPS gst_static_caps_get(&tta_caps)
1022 static void
tta_type_find(GstTypeFind * tf,gpointer unused)1023 tta_type_find (GstTypeFind * tf, gpointer unused)
1024 {
1025   const guint8 *data = gst_type_find_peek (tf, 0, 3);
1026 
1027   if (data) {
1028     if (memcmp (data, "TTA", 3) == 0) {
1029       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TTA_CAPS);
1030       return;
1031     }
1032   }
1033 }
1034 
1035 /*** audio/x-flac ***/
1036 static GstStaticCaps flac_caps = GST_STATIC_CAPS ("audio/x-flac");
1037 
1038 #define FLAC_CAPS (gst_static_caps_get(&flac_caps))
1039 
1040 static void
flac_type_find(GstTypeFind * tf,gpointer unused)1041 flac_type_find (GstTypeFind * tf, gpointer unused)
1042 {
1043   DataScanCtx c = { 0, NULL, 0 };
1044 
1045   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
1046     return;
1047 
1048   /* standard flac (also old/broken flac-in-ogg with an initial 4-byte marker
1049    * packet and without the usual packet framing) */
1050   if (memcmp (c.data, "fLaC", 4) == 0) {
1051     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLAC_CAPS);
1052     return;
1053   }
1054 
1055   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
1056     return;
1057 
1058   /* flac-in-ogg, see http://flac.sourceforge.net/ogg_mapping.html */
1059   if (memcmp (c.data, "\177FLAC\001", 6) == 0) {
1060     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, FLAC_CAPS);
1061     return;
1062   }
1063 
1064 /* disabled because it happily typefinds /dev/urandom as audio/x-flac, and
1065  * because I yet have to see header-less flac in the wild */
1066 #if 0
1067   /* flac without headers (subset format) */
1068   /* 64K should be enough */
1069   while (c.offset < (64 * 1024)) {
1070     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
1071       break;
1072 
1073     /* look for frame header,
1074      * http://flac.sourceforge.net/format.html#frame_header
1075      */
1076     if (c.data[0] == 0xff && (c.data[1] >> 2) == 0x3e) {
1077       /* bit 15 in the header must be 0 */
1078       if (((c.data[1] >> 1) & 0x01) == 0x01)
1079         goto advance;
1080 
1081       /* blocksize must be != 0x00 */
1082       if ((c.data[2] >> 4) == 0x00)
1083         goto advance;
1084 
1085       /* samplerate must be != 0x0f */
1086       if ((c.data[2] & 0x0f) == 0x0f)
1087         goto advance;
1088       /* also 0 is invalid, as it means get the info from the header and we
1089        * don't have headers if we are here */
1090       if ((c.data[2] & 0x0f) == 0x00)
1091         goto advance;
1092 
1093       /* channel assignment must be < 11 */
1094       if ((c.data[3] >> 4) >= 11)
1095         goto advance;
1096 
1097       /* sample size must be != 0x07 and != 0x05 */
1098       if (((c.data[3] >> 1) & 0x07) == 0x07)
1099         goto advance;
1100       if (((c.data[3] >> 1) & 0x07) == 0x05)
1101         goto advance;
1102       /* also 0 is invalid, as it means get the info from the header and we
1103        * don't have headers if we are here */
1104       if (((c.data[3] >> 1) & 0x07) == 0x00)
1105         goto advance;
1106 
1107       /* next bit must be 0 */
1108       if ((c.data[3] & 0x01) == 0x01)
1109         goto advance;
1110 
1111       /* FIXME: shouldn't we include the crc check ? */
1112 
1113       GST_DEBUG ("Found flac without headers at %d", (gint) c.offset);
1114       gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, FLAC_CAPS);
1115       return;
1116     }
1117   advance:
1118     data_scan_ctx_advance (tf, &c, 1);
1119   }
1120 #endif
1121 }
1122 
1123 /* TODO: we could probably make a generic function for this.. */
1124 static gint
aac_type_find_scan_loas_frames_ep(GstTypeFind * tf,DataScanCtx * scan_ctx,gint max_frames)1125 aac_type_find_scan_loas_frames_ep (GstTypeFind * tf, DataScanCtx * scan_ctx,
1126     gint max_frames)
1127 {
1128   DataScanCtx c = *scan_ctx;
1129   guint16 snc;
1130   guint len;
1131   gint count = 0;
1132 
1133   do {
1134     if (!data_scan_ctx_ensure_data (tf, &c, 5))
1135       break;
1136 
1137     /* EPAudioSyncStream */
1138     len = ((c.data[2] & 0x0f) << 9) | (c.data[3] << 1) |
1139         ((c.data[4] & 0x80) >> 7);
1140 
1141     if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) {
1142       GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1143       break;
1144     }
1145 
1146     /* check length of frame  */
1147     snc = GST_READ_UINT16_BE (c.data + len);
1148     if (snc != 0x4de1) {
1149       GST_DEBUG ("No sync found at 0x%" G_GINT64_MODIFIER "x", c.offset + len);
1150       break;
1151     }
1152 
1153     ++count;
1154 
1155     GST_DEBUG ("Found LOAS syncword #%d at offset 0x%" G_GINT64_MODIFIER "x, "
1156         "framelen %u", count, c.offset, len);
1157 
1158     data_scan_ctx_advance (tf, &c, len);
1159   } while (count < max_frames && (c.offset - scan_ctx->offset) < 64 * 1024);
1160 
1161   GST_DEBUG ("found %d consecutive frames", count);
1162   return count;
1163 }
1164 
1165 static gint
aac_type_find_scan_loas_frames(GstTypeFind * tf,DataScanCtx * scan_ctx,gint max_frames)1166 aac_type_find_scan_loas_frames (GstTypeFind * tf, DataScanCtx * scan_ctx,
1167     gint max_frames)
1168 {
1169   DataScanCtx c = *scan_ctx;
1170   guint16 snc;
1171   guint len;
1172   gint count = 0;
1173 
1174   do {
1175     if (!data_scan_ctx_ensure_data (tf, &c, 3))
1176       break;
1177 
1178     /* AudioSyncStream */
1179     len = ((c.data[1] & 0x1f) << 8) | c.data[2];
1180     /* add size of sync stream header */
1181     len += 3;
1182 
1183     if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) {
1184       GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1185       break;
1186     }
1187 
1188     /* check length of frame  */
1189     snc = GST_READ_UINT16_BE (c.data + len);
1190     if ((snc & 0xffe0) != 0x56e0) {
1191       GST_DEBUG ("No sync found at 0x%" G_GINT64_MODIFIER "x", c.offset + len);
1192       break;
1193     }
1194 
1195     ++count;
1196 
1197     GST_DEBUG ("Found LOAS syncword #%d at offset 0x%" G_GINT64_MODIFIER "x, "
1198         "framelen %u", count, c.offset, len);
1199 
1200     data_scan_ctx_advance (tf, &c, len);
1201   } while (count < max_frames && (c.offset - scan_ctx->offset) < 64 * 1024);
1202 
1203   GST_DEBUG ("found %d consecutive frames", count);
1204   return count;
1205 }
1206 
1207 /*** audio/mpeg version 2, 4 ***/
1208 
1209 static GstStaticCaps aac_caps = GST_STATIC_CAPS ("audio/mpeg, "
1210     "mpegversion = (int) { 2, 4 }, framed = (bool) false");
1211 #define AAC_CAPS (gst_static_caps_get(&aac_caps))
1212 #define AAC_AMOUNT (4096)
1213 static void
aac_type_find(GstTypeFind * tf,gpointer unused)1214 aac_type_find (GstTypeFind * tf, gpointer unused)
1215 {
1216 /* ohos.opt.compat.0002: the demux of gstplayer does not accurately parse audio resources in the aac format.
1217  * As a result, the duration value cannot be obtained in the preparation phase.
1218  * Use the demux and typefind of ffmpeg to process audio resources in aac format.
1219  */
1220 #ifdef OHOS_OPT_COMPAT
1221   return;
1222 #endif
1223   DataScanCtx c = { 0, NULL, 0 };
1224   GstTypeFindProbability best_probability = GST_TYPE_FIND_NONE;
1225   GstCaps *best_caps = NULL;
1226   gint best_count = 0;
1227 
1228   while (c.offset < AAC_AMOUNT) {
1229     guint snc, len, offset, i;
1230 
1231     /* detect adts header or adif header.
1232      * The ADIF header is 4 bytes, that should be OK. The ADTS header, on
1233      * the other hand, is 14 bits only, so we require one valid frame with
1234      * again a valid syncpoint on the next one (28 bits) for certainty. We
1235      * require 4 kB, which is quite a lot, since frames are generally 200-400
1236      * bytes.
1237      * LOAS has 2 possible syncwords, which are 11 bits and 16 bits long.
1238      * The following stream syntax depends on which one is found.
1239      */
1240     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
1241       break;
1242 
1243     snc = GST_READ_UINT16_BE (c.data);
1244     if (G_UNLIKELY ((snc & 0xfff6) == 0xfff0)) {
1245       /* ADTS header - find frame length */
1246       GST_DEBUG ("Found one ADTS syncpoint at offset 0x%" G_GINT64_MODIFIER
1247           "x, tracing next...", c.offset);
1248       len = ((c.data[3] & 0x03) << 11) |
1249           (c.data[4] << 3) | ((c.data[5] & 0xe0) >> 5);
1250 
1251       if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 6)) {
1252         GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1253         goto next;
1254       }
1255 
1256       offset = len;
1257       /* check if there's a second ADTS frame */
1258       snc = GST_READ_UINT16_BE (c.data + offset);
1259       if ((snc & 0xfff6) == 0xfff0) {
1260         GstCaps *caps;
1261         guint mpegversion, sample_freq_idx, channel_config, profile_idx, rate;
1262         guint8 audio_config[2];
1263 
1264         mpegversion = (c.data[1] & 0x08) ? 2 : 4;
1265         profile_idx = c.data[2] >> 6;
1266         sample_freq_idx = ((c.data[2] & 0x3c) >> 2);
1267         channel_config = ((c.data[2] & 0x01) << 2) + (c.data[3] >> 6);
1268 
1269         GST_DEBUG ("Found second ADTS-%d syncpoint at offset 0x%"
1270             G_GINT64_MODIFIER "x, framelen %u", mpegversion, c.offset, len);
1271 
1272         /* 0xd and 0xe are reserved. 0xf means the sample frequency is directly
1273          * specified in the header, but that's not allowed for ADTS */
1274         if (sample_freq_idx > 0xc) {
1275           GST_DEBUG ("Unexpected sample frequency index %d or wrong sync",
1276               sample_freq_idx);
1277           goto next;
1278         }
1279 
1280         rate = gst_codec_utils_aac_get_sample_rate_from_index (sample_freq_idx);
1281         GST_LOG ("ADTS: profile=%u, rate=%u", profile_idx, rate);
1282 
1283         /* The ADTS frame header is slightly different from the
1284          * AudioSpecificConfig defined for the MPEG-4 container, so we just
1285          * construct enough of it for getting the level here. */
1286         /* ADTS counts profiles from 0 instead of 1 to save bits */
1287         audio_config[0] = (profile_idx + 1) << 3;
1288         audio_config[0] |= (sample_freq_idx >> 1) & 0x7;
1289         audio_config[1] = (sample_freq_idx & 0x1) << 7;
1290         audio_config[1] |= (channel_config & 0xf) << 3;
1291 
1292         caps = gst_caps_new_simple ("audio/mpeg",
1293             "framed", G_TYPE_BOOLEAN, FALSE,
1294             "mpegversion", G_TYPE_INT, mpegversion,
1295             "stream-format", G_TYPE_STRING, "adts", NULL);
1296 
1297         gst_codec_utils_aac_caps_set_level_and_profile (caps, audio_config, 2);
1298 
1299         /* add rate and number of channels if we can */
1300         if (channel_config != 0 && channel_config <= 7) {
1301           const guint channels_map[] = { 0, 1, 2, 3, 4, 5, 6, 8 };
1302 
1303           gst_caps_set_simple (caps, "channels", G_TYPE_INT,
1304               channels_map[channel_config], "rate", G_TYPE_INT, rate, NULL);
1305         }
1306 
1307         /* length of the second ADTS frame */
1308         len = ((c.data[offset + 3] & 0x03) << 11) |
1309             (c.data[offset + 4] << 3) | ((c.data[offset + 5] & 0xe0) >> 5);
1310 
1311         if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, offset + len + 6)) {
1312           GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len);
1313           gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, caps);
1314         } else {
1315           offset += len;
1316           /* find more aac sync to select correctly */
1317           /* check if there's a third/fourth/fifth/sixth ADTS frame, if there is a sixth frame, set probability to maximum:100% */
1318           for (i = 3; i <= 6; i++) {
1319             len = ((c.data[offset + 3] & 0x03) << 11) |
1320                 (c.data[offset + 4] << 3) | ((c.data[offset + 5] & 0xe0) >> 5);
1321             if (len == 0
1322                 || !data_scan_ctx_ensure_data (tf, &c, offset + len + 6)) {
1323               GST_DEBUG ("Wrong sync or next frame not within reach, len=%u",
1324                   len);
1325               break;
1326             }
1327             snc = GST_READ_UINT16_BE (c.data + offset);
1328             if ((snc & 0xfff6) == 0xfff0) {
1329               GST_DEBUG ("Find %und Sync..probability is %u ", i,
1330                   GST_TYPE_FIND_LIKELY + 5 * (i - 2));
1331               offset += len;
1332             } else {
1333               break;
1334             }
1335           }
1336           gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY + 5 * (i - 3), caps);
1337 
1338         }
1339         gst_caps_unref (caps);
1340         break;
1341       }
1342 
1343       GST_DEBUG ("No next frame found... (should have been at 0x%x)", len);
1344     } else if (G_UNLIKELY ((snc & 0xffe0) == 0x56e0 || snc == 0x4de1)) {
1345       gint count;
1346 
1347       /* LOAS frame */
1348       GST_INFO ("Possible LOAS syncword at offset 0x%" G_GINT64_MODIFIER
1349           "x, scanning for more frames...", c.offset);
1350 
1351       if (snc == 0x4de1)
1352         count = aac_type_find_scan_loas_frames_ep (tf, &c, 20);
1353       else
1354         count = aac_type_find_scan_loas_frames (tf, &c, 20);
1355 
1356       if (count >= 3 && count > best_count) {
1357         gst_caps_replace (&best_caps, NULL);
1358         best_caps = gst_caps_new_simple ("audio/mpeg",
1359             "framed", G_TYPE_BOOLEAN, FALSE,
1360             "mpegversion", G_TYPE_INT, 4,
1361             "stream-format", G_TYPE_STRING, "loas", NULL);
1362         best_count = count;
1363         best_probability = GST_TYPE_FIND_POSSIBLE - 10 + count * 3;
1364         if (best_probability >= GST_TYPE_FIND_LIKELY)
1365           break;
1366       }
1367     } else if (!memcmp (c.data, "ADIF", 4)) {
1368       /* ADIF header */
1369       gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, "audio/mpeg",
1370           "framed", G_TYPE_BOOLEAN, FALSE, "mpegversion", G_TYPE_INT, 4,
1371           "stream-format", G_TYPE_STRING, "adif", NULL);
1372       break;
1373     }
1374 
1375   next:
1376 
1377     data_scan_ctx_advance (tf, &c, 1);
1378   }
1379 
1380   if (best_probability > GST_TYPE_FIND_NONE) {
1381     gst_type_find_suggest (tf, best_probability, best_caps);
1382     gst_caps_unref (best_caps);
1383   }
1384 }
1385 
1386 /*** audio/mpeg version 1 ***/
1387 
1388 /*
1389  * The chance that random data is identified as a valid mp3 header is 63 / 2^18
1390  * (0.024%) per try. This makes the function for calculating false positives
1391  *   1 - (1 - ((63 / 2 ^18) ^ GST_MP3_TYPEFIND_MIN_HEADERS)) ^ buffersize)
1392  * This has the following probabilities of false positives:
1393  * datasize               MIN_HEADERS
1394  * (bytes)      1       2       3       4
1395  * 4096         62.6%    0.02%   0%      0%
1396  * 16384        98%      0.09%   0%      0%
1397  * 1 MiB       100%      5.88%   0%      0%
1398  * 1 GiB       100%    100%      1.44%   0%
1399  * 1 TiB       100%    100%    100%      0.35%
1400  * This means that the current choice (3 headers by most of the time 4096 byte
1401  * buffers is pretty safe for now.
1402  *
1403  * The max. size of each frame is 1440 bytes, which means that for N frames to
1404  * be detected, we need 1440 * GST_MP3_TYPEFIND_MIN_HEADERS + 3 bytes of data.
1405  * Assuming we step into the stream right after the frame header, this
1406  * means we need 1440 * (GST_MP3_TYPEFIND_MIN_HEADERS + 1) - 1 + 3 bytes
1407  * of data (5762) to always detect any mp3.
1408  */
1409 
1410 static const guint mp3types_bitrates[2][3][16] =
1411     { {{0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,},
1412     {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,},
1413     {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,}},
1414 {{0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,},
1415     {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,},
1416     {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,}},
1417 };
1418 
1419 static const guint mp3types_freqs[3][3] = { {11025, 12000, 8000},
1420 {22050, 24000, 16000},
1421 {44100, 48000, 32000}
1422 };
1423 
1424 static inline guint
mp3_type_frame_length_from_header(guint32 header,guint * put_layer,guint * put_channels,guint * put_bitrate,guint * put_samplerate,gboolean * may_be_free_format,gint possible_free_framelen)1425 mp3_type_frame_length_from_header (guint32 header, guint * put_layer,
1426     guint * put_channels, guint * put_bitrate, guint * put_samplerate,
1427     gboolean * may_be_free_format, gint possible_free_framelen)
1428 {
1429   guint bitrate, layer, length, mode, samplerate, version, channels;
1430 
1431   if ((header & 0xffe00000) != 0xffe00000)
1432     return 0;
1433 
1434   /* we don't need extension, copyright, original or
1435    * emphasis for the frame length */
1436   header >>= 6;
1437 
1438   /* mode */
1439   mode = header & 0x3;
1440   header >>= 3;
1441 
1442   /* padding */
1443   length = header & 0x1;
1444   header >>= 1;
1445 
1446   /* sampling frequency */
1447   samplerate = header & 0x3;
1448   if (samplerate == 3)
1449     return 0;
1450   header >>= 2;
1451 
1452   /* bitrate index */
1453   bitrate = header & 0xF;
1454   if (bitrate == 0 && possible_free_framelen == -1) {
1455     GST_LOG ("Possibly a free format mp3 - signaling");
1456     *may_be_free_format = TRUE;
1457   }
1458   if (bitrate == 15 || (bitrate == 0 && possible_free_framelen == -1))
1459     return 0;
1460 
1461   /* ignore error correction, too */
1462   header >>= 5;
1463 
1464   /* layer */
1465   layer = 4 - (header & 0x3);
1466   if (layer == 4)
1467     return 0;
1468   header >>= 2;
1469 
1470   /* version 0=MPEG2.5; 2=MPEG2; 3=MPEG1 */
1471   version = header & 0x3;
1472   if (version == 1)
1473     return 0;
1474 
1475   /* lookup */
1476   channels = (mode == 3) ? 1 : 2;
1477   samplerate = mp3types_freqs[version > 0 ? version - 1 : 0][samplerate];
1478   if (bitrate == 0) {
1479     /* possible freeform mp3 */
1480     if (layer == 1) {
1481       length *= 4;
1482       length += possible_free_framelen;
1483       bitrate = length * samplerate / 48000;
1484     } else {
1485       length += possible_free_framelen;
1486       bitrate = length * samplerate /
1487           ((layer == 3 && version != 3) ? 72000 : 144000);
1488     }
1489     /* freeform mp3 should have a higher-than-usually-allowed bitrate */
1490     GST_LOG ("calculated bitrate: %u, max usually: %u", bitrate,
1491         mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][14]);
1492     if (bitrate < mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][14])
1493       return 0;
1494   } else {
1495     /* calculating */
1496     bitrate = mp3types_bitrates[version == 3 ? 0 : 1][layer - 1][bitrate];
1497     if (layer == 1) {
1498       length = ((12000 * bitrate / samplerate) + length) * 4;
1499     } else {
1500       length += ((layer == 3
1501               && version != 3) ? 72000 : 144000) * bitrate / samplerate;
1502     }
1503   }
1504 
1505   GST_LOG ("mp3typefind: calculated mp3 frame length of %u bytes", length);
1506   GST_LOG
1507       ("mp3typefind: samplerate = %u - bitrate = %u - layer = %u - version = %u"
1508       " - channels = %u", samplerate, bitrate, layer, version, channels);
1509 
1510   if (put_layer)
1511     *put_layer = layer;
1512   if (put_channels)
1513     *put_channels = channels;
1514   if (put_bitrate)
1515     *put_bitrate = bitrate;
1516   if (put_samplerate)
1517     *put_samplerate = samplerate;
1518 
1519   return length;
1520 }
1521 
1522 
1523 static GstStaticCaps mp3_caps = GST_STATIC_CAPS ("audio/mpeg, "
1524     "mpegversion = (int) 1, layer = (int) [ 1, 3 ]");
1525 #define MP3_CAPS (gst_static_caps_get(&mp3_caps))
1526 /*
1527  * random values for typefinding
1528  * if no more data is available, we will return a probability of
1529  * (found_headers/TRY_HEADERS) * (MAXIMUM * (TRY_SYNC - bytes_skipped)
1530  *        / TRY_SYNC)
1531  * if found_headers >= MIN_HEADERS
1532  */
1533 #define GST_MP3_TYPEFIND_MIN_HEADERS (2)
1534 #define GST_MP3_TYPEFIND_TRY_HEADERS (5)
1535 #define GST_MP3_TYPEFIND_TRY_SYNC (GST_TYPE_FIND_MAXIMUM * 100) /* 10kB */
1536 #define GST_MP3_TYPEFIND_SYNC_SIZE (2048)
1537 #define GST_MP3_WRONG_HEADER (10)
1538 
1539 static void
mp3_type_find_at_offset(GstTypeFind * tf,guint64 start_off,guint * found_layer,GstTypeFindProbability * found_prob)1540 mp3_type_find_at_offset (GstTypeFind * tf, guint64 start_off,
1541     guint * found_layer, GstTypeFindProbability * found_prob)
1542 {
1543   const guint8 *data = NULL;
1544   const guint8 *data_end = NULL;
1545   guint size;
1546   guint64 skipped;
1547   gint last_free_offset = -1;
1548   gint last_free_framelen = -1;
1549   gboolean headerstart = TRUE;
1550 
1551   *found_layer = 0;
1552   *found_prob = 0;
1553 
1554   size = 0;
1555   skipped = 0;
1556   while (skipped < GST_MP3_TYPEFIND_TRY_SYNC) {
1557     if (size <= 0) {
1558       size = GST_MP3_TYPEFIND_SYNC_SIZE * 2;
1559       do {
1560         size /= 2;
1561         data = gst_type_find_peek (tf, skipped + start_off, size);
1562       } while (size > 10 && !data);
1563       if (!data)
1564         break;
1565       data_end = data + size;
1566     }
1567     if (*data == 0xFF) {
1568       const guint8 *head_data = NULL;
1569       guint layer = 0, bitrate, samplerate, channels;
1570       guint found = 0;          /* number of valid headers found */
1571       guint64 offset = skipped;
1572       gboolean changed = FALSE;
1573       guint prev_layer = 0;
1574       guint prev_channels = 0, prev_samplerate = 0;
1575 
1576       while (found < GST_MP3_TYPEFIND_TRY_HEADERS) {
1577         guint32 head;
1578         guint length;
1579         gboolean free = FALSE;
1580 
1581         if ((gint64) (offset - skipped + 4) >= 0 &&
1582             data + offset - skipped + 4 < data_end) {
1583           head_data = data + offset - skipped;
1584         } else {
1585           head_data = gst_type_find_peek (tf, offset + start_off, 4);
1586         }
1587         if (!head_data)
1588           break;
1589         head = GST_READ_UINT32_BE (head_data);
1590         if (!(length = mp3_type_frame_length_from_header (head, &layer,
1591                     &channels, &bitrate, &samplerate, &free,
1592                     last_free_framelen))) {
1593           if (free) {
1594             if (last_free_offset == -1)
1595               last_free_offset = offset;
1596             else {
1597               last_free_framelen = offset - last_free_offset;
1598               offset = last_free_offset;
1599               continue;
1600             }
1601           } else {
1602             last_free_framelen = -1;
1603           }
1604 
1605           /* Mark the fact that we didn't find a valid header at the beginning */
1606           if (found == 0)
1607             headerstart = FALSE;
1608 
1609           GST_LOG ("%d. header at offset %" G_GUINT64_FORMAT
1610               " (0x%" G_GINT64_MODIFIER "x) was not an mp3 header "
1611               "(possibly-free: %s)", found + 1, start_off + offset,
1612               start_off + offset, free ? "yes" : "no");
1613           break;
1614         }
1615         if ((prev_layer && prev_layer != layer) ||
1616             /* (prev_bitrate && prev_bitrate != bitrate) || <-- VBR */
1617             (prev_samplerate && prev_samplerate != samplerate) ||
1618             (prev_channels && prev_channels != channels)) {
1619           /* this means an invalid property, or a change, which might mean
1620            * that this is not a mp3 but just a random bytestream. It could
1621            * be a freaking funky encoded mp3 though. We'll just not count
1622            * this header*/
1623           if (prev_layer)
1624             changed = TRUE;
1625         } else {
1626           found++;
1627           GST_LOG ("found %d. header at offset %" G_GUINT64_FORMAT " (0x%"
1628               G_GINT64_MODIFIER "X)", found, start_off + offset,
1629               start_off + offset);
1630         }
1631         prev_layer = layer;
1632         prev_channels = channels;
1633         prev_samplerate = samplerate;
1634 
1635         offset += length;
1636       }
1637       g_assert (found <= GST_MP3_TYPEFIND_TRY_HEADERS);
1638       if (found != 0 && head_data == NULL &&
1639           gst_type_find_peek (tf, offset + start_off - 1, 1) == NULL)
1640         /* Incomplete last frame - don't count it. */
1641         found--;
1642       if (found == GST_MP3_TYPEFIND_TRY_HEADERS ||
1643           (found >= GST_MP3_TYPEFIND_MIN_HEADERS && head_data == NULL)) {
1644         /* we can make a valid guess */
1645         guint probability = found * GST_TYPE_FIND_MAXIMUM *
1646             (GST_MP3_TYPEFIND_TRY_SYNC - skipped) /
1647             GST_MP3_TYPEFIND_TRY_HEADERS / GST_MP3_TYPEFIND_TRY_SYNC;
1648 
1649         if (!headerstart
1650             && probability > (GST_TYPE_FIND_MINIMUM + GST_MP3_WRONG_HEADER))
1651           probability -= GST_MP3_WRONG_HEADER;
1652         if (probability < GST_TYPE_FIND_MINIMUM)
1653           probability = GST_TYPE_FIND_MINIMUM;
1654         if (start_off > 0)
1655           probability /= 2;
1656         if (!changed)
1657           probability = (probability + GST_TYPE_FIND_MAXIMUM) / 2;
1658 
1659         GST_INFO
1660             ("audio/mpeg calculated %u  =  %u  *  %u / %u  *  (%u - %"
1661             G_GUINT64_FORMAT ") / %u", probability, GST_TYPE_FIND_MAXIMUM,
1662             found, GST_MP3_TYPEFIND_TRY_HEADERS, GST_MP3_TYPEFIND_TRY_SYNC,
1663             (guint64) skipped, GST_MP3_TYPEFIND_TRY_SYNC);
1664         /* make sure we're not id3 tagged */
1665         head_data = gst_type_find_peek (tf, -128, 3);
1666         if (head_data && (memcmp (head_data, "TAG", 3) == 0)) {
1667           probability = 0;
1668         }
1669         g_assert (probability <= GST_TYPE_FIND_MAXIMUM);
1670 
1671         *found_prob = probability;
1672         if (probability > 0)
1673           *found_layer = layer;
1674         return;
1675       }
1676     }
1677     data++;
1678     skipped++;
1679     size--;
1680   }
1681 }
1682 
1683 static void
mp3_type_find(GstTypeFind * tf,gpointer unused)1684 mp3_type_find (GstTypeFind * tf, gpointer unused)
1685 {
1686 /* ohos.opt.compat.0001: The demux of gstplayer does not accurately parse audio resources in the MP3 format.
1687  * As a result, the duration value cannot be obtained in the preparation phase.
1688  * Use the demux and typefind of ffmpeg to process audio resources in MP3 format.
1689  */
1690 #ifdef OHOS_OPT_COMPAT
1691   return;
1692 #endif
1693   GstTypeFindProbability prob, mid_prob;
1694   const guint8 *data;
1695   guint layer, mid_layer;
1696   guint64 length;
1697 
1698   mp3_type_find_at_offset (tf, 0, &layer, &prob);
1699   length = gst_type_find_get_length (tf);
1700 
1701   if (length == 0 || length == (guint64) - 1) {
1702     if (prob != 0)
1703       goto suggest;
1704     return;
1705   }
1706 
1707   /* if we're pretty certain already, skip the additional check */
1708   if (prob >= GST_TYPE_FIND_LIKELY)
1709     goto suggest;
1710 
1711   mp3_type_find_at_offset (tf, length / 2, &mid_layer, &mid_prob);
1712 
1713   if (mid_prob > 0) {
1714     if (prob == 0) {
1715       GST_LOG ("detected audio/mpeg only in the middle (p=%u)", mid_prob);
1716       layer = mid_layer;
1717       prob = mid_prob;
1718       goto suggest;
1719     }
1720 
1721     if (layer != mid_layer) {
1722       GST_WARNING ("audio/mpeg layer discrepancy: %u vs. %u", layer, mid_layer);
1723       return;                   /* FIXME: or should we just go with the one in the middle? */
1724     }
1725 
1726     /* detected mpeg audio both in middle of the file and at the start */
1727     prob = (prob + mid_prob) / 2;
1728     goto suggest;
1729   }
1730 
1731   /* a valid header right at the start makes it more likely
1732    * that this is actually plain mpeg-1 audio */
1733   if (prob > 0) {
1734     data = gst_type_find_peek (tf, 0, 4);       /* use min. frame size? */
1735     if (data && mp3_type_frame_length_from_header (GST_READ_UINT32_BE (data),
1736             &layer, NULL, NULL, NULL, NULL, 0) != 0) {
1737       prob = MIN (prob + 10, GST_TYPE_FIND_MAXIMUM);
1738     }
1739   }
1740 
1741   if (prob > 0)
1742     goto suggest;
1743 
1744   return;
1745 
1746 suggest:
1747   {
1748     g_return_if_fail (layer >= 1 && layer <= 3);
1749 
1750     gst_type_find_suggest_simple (tf, prob, "audio/mpeg",
1751         "mpegversion", G_TYPE_INT, 1, "layer", G_TYPE_INT, layer,
1752         "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
1753   }
1754 }
1755 
1756 /*** audio/x-musepack ***/
1757 
1758 static GstStaticCaps musepack_caps =
1759 GST_STATIC_CAPS ("audio/x-musepack, streamversion= (int) { 7, 8 }");
1760 
1761 #define MUSEPACK_CAPS (gst_static_caps_get(&musepack_caps))
1762 static void
musepack_type_find(GstTypeFind * tf,gpointer unused)1763 musepack_type_find (GstTypeFind * tf, gpointer unused)
1764 {
1765   const guint8 *data = gst_type_find_peek (tf, 0, 4);
1766   GstTypeFindProbability prop = GST_TYPE_FIND_MINIMUM;
1767   gint streamversion = -1;
1768 
1769   if (data && memcmp (data, "MP+", 3) == 0) {
1770     streamversion = 7;
1771     if ((data[3] & 0x7f) == 7) {
1772       prop = GST_TYPE_FIND_MAXIMUM;
1773     } else {
1774       prop = GST_TYPE_FIND_LIKELY + 10;
1775     }
1776   } else if (data && memcmp (data, "MPCK", 4) == 0) {
1777     streamversion = 8;
1778     prop = GST_TYPE_FIND_MAXIMUM;
1779   }
1780 
1781   if (streamversion != -1) {
1782     gst_type_find_suggest_simple (tf, prop, "audio/x-musepack",
1783         "streamversion", G_TYPE_INT, streamversion, NULL);
1784   }
1785 }
1786 
1787 /*** audio/x-ac3 ***/
1788 /* FIXME 0.11: should be audio/ac3, but isn't for backwards compatibility */
1789 static GstStaticCaps ac3_caps = GST_STATIC_CAPS ("audio/x-ac3");
1790 
1791 #define AC3_CAPS (gst_static_caps_get(&ac3_caps))
1792 
1793 static GstStaticCaps eac3_caps = GST_STATIC_CAPS ("audio/x-eac3");
1794 
1795 #define EAC3_CAPS (gst_static_caps_get(&eac3_caps))
1796 
1797 struct ac3_frmsize
1798 {
1799   unsigned short bit_rate;
1800   unsigned short frm_size[3];
1801 };
1802 
1803 static const struct ac3_frmsize ac3_frmsizecod_tbl[] = {
1804   {32, {64, 69, 96}},
1805   {32, {64, 70, 96}},
1806   {40, {80, 87, 120}},
1807   {40, {80, 88, 120}},
1808   {48, {96, 104, 144}},
1809   {48, {96, 105, 144}},
1810   {56, {112, 121, 168}},
1811   {56, {112, 122, 168}},
1812   {64, {128, 139, 192}},
1813   {64, {128, 140, 192}},
1814   {80, {160, 174, 240}},
1815   {80, {160, 175, 240}},
1816   {96, {192, 208, 288}},
1817   {96, {192, 209, 288}},
1818   {112, {224, 243, 336}},
1819   {112, {224, 244, 336}},
1820   {128, {256, 278, 384}},
1821   {128, {256, 279, 384}},
1822   {160, {320, 348, 480}},
1823   {160, {320, 349, 480}},
1824   {192, {384, 417, 576}},
1825   {192, {384, 418, 576}},
1826   {224, {448, 487, 672}},
1827   {224, {448, 488, 672}},
1828   {256, {512, 557, 768}},
1829   {256, {512, 558, 768}},
1830   {320, {640, 696, 960}},
1831   {320, {640, 697, 960}},
1832   {384, {768, 835, 1152}},
1833   {384, {768, 836, 1152}},
1834   {448, {896, 975, 1344}},
1835   {448, {896, 976, 1344}},
1836   {512, {1024, 1114, 1536}},
1837   {512, {1024, 1115, 1536}},
1838   {576, {1152, 1253, 1728}},
1839   {576, {1152, 1254, 1728}},
1840   {640, {1280, 1393, 1920}},
1841   {640, {1280, 1394, 1920}}
1842 };
1843 
1844 static void
ac3_type_find(GstTypeFind * tf,gpointer unused)1845 ac3_type_find (GstTypeFind * tf, gpointer unused)
1846 {
1847   DataScanCtx c = { 0, NULL, 0 };
1848 
1849   /* Search for an ac3 frame; not necessarily right at the start, but give it
1850    * a lower probability if not found right at the start. Check that the
1851    * frame is followed by a second frame at the expected offset.
1852    * We could also check the two ac3 CRCs, but we don't do that right now */
1853   while (c.offset < 1024) {
1854     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 6)))
1855       break;
1856 
1857     if (c.data[0] == 0x0b && c.data[1] == 0x77) {
1858       guint bsid = c.data[5] >> 3;
1859 
1860       if (bsid <= 8) {
1861         /* ac3 */
1862         guint fscod = c.data[4] >> 6;
1863         guint frmsizecod = c.data[4] & 0x3f;
1864 
1865         if (fscod < 3 && frmsizecod < 38) {
1866           DataScanCtx c_next = c;
1867           guint frame_size;
1868 
1869           frame_size = ac3_frmsizecod_tbl[frmsizecod].frm_size[fscod];
1870           GST_LOG ("possible AC3 frame sync at offset %"
1871               G_GUINT64_FORMAT ", size=%u", c.offset, frame_size);
1872           if (data_scan_ctx_ensure_data (tf, &c_next, (frame_size * 2) + 5)) {
1873             data_scan_ctx_advance (tf, &c_next, frame_size * 2);
1874 
1875             if (c_next.data[0] == 0x0b && c_next.data[1] == 0x77) {
1876               fscod = c_next.data[4] >> 6;
1877               frmsizecod = c_next.data[4] & 0x3f;
1878 
1879               if (fscod < 3 && frmsizecod < 38) {
1880                 GstTypeFindProbability prob;
1881 
1882                 GST_LOG ("found second AC3 frame (size=%u), looks good",
1883                     ac3_frmsizecod_tbl[frmsizecod].frm_size[fscod]);
1884                 if (c.offset == 0)
1885                   prob = GST_TYPE_FIND_MAXIMUM;
1886                 else
1887                   prob = GST_TYPE_FIND_NEARLY_CERTAIN;
1888 
1889                 gst_type_find_suggest (tf, prob, AC3_CAPS);
1890                 return;
1891               }
1892             } else {
1893               GST_LOG ("no second AC3 frame found, false sync");
1894             }
1895           }
1896         }
1897       } else if (bsid <= 16 && bsid > 10) {
1898         /* eac3 */
1899         DataScanCtx c_next = c;
1900         guint frame_size;
1901 
1902         frame_size = (((c.data[2] & 0x07) << 8) + c.data[3]) + 1;
1903         GST_LOG ("possible E-AC3 frame sync at offset %"
1904             G_GUINT64_FORMAT ", size=%u", c.offset, frame_size);
1905         if (data_scan_ctx_ensure_data (tf, &c_next, (frame_size * 2) + 5)) {
1906           data_scan_ctx_advance (tf, &c_next, frame_size * 2);
1907 
1908           if (c_next.data[0] == 0x0b && c_next.data[1] == 0x77) {
1909             GstTypeFindProbability prob;
1910 
1911             GST_LOG ("found second E-AC3 frame, looks good");
1912             if (c.offset == 0)
1913               prob = GST_TYPE_FIND_MAXIMUM;
1914             else
1915               prob = GST_TYPE_FIND_NEARLY_CERTAIN;
1916 
1917             gst_type_find_suggest (tf, prob, EAC3_CAPS);
1918             return;
1919           } else {
1920             GST_LOG ("no second E-AC3 frame found, false sync");
1921           }
1922         }
1923       } else {
1924         GST_LOG ("invalid AC3 BSID: %u", bsid);
1925       }
1926     }
1927     data_scan_ctx_advance (tf, &c, 1);
1928   }
1929 }
1930 
1931 /*** audio/x-dts ***/
1932 static GstStaticCaps dts_caps = GST_STATIC_CAPS ("audio/x-dts");
1933 #define DTS_CAPS (gst_static_caps_get (&dts_caps))
1934 #define DTS_MIN_FRAMESIZE 96
1935 #define DTS_MAX_FRAMESIZE 18725 /* 16384*16/14 */
1936 
1937 static gboolean
dts_parse_frame_header(DataScanCtx * c,guint * frame_size,guint * sample_rate,guint * channels,guint * depth,guint * endianness)1938 dts_parse_frame_header (DataScanCtx * c, guint * frame_size,
1939     guint * sample_rate, guint * channels, guint * depth, guint * endianness)
1940 {
1941   static const int sample_rates[16] = { 0, 8000, 16000, 32000, 0, 0, 11025,
1942     22050, 44100, 0, 0, 12000, 24000, 48000, 96000, 192000
1943   };
1944   static const guint8 channels_table[16] = { 1, 2, 2, 2, 2, 3, 3, 4, 4, 5,
1945     6, 6, 6, 7, 8, 8
1946   };
1947   guint16 hdr[8];
1948   guint32 marker;
1949   guint num_blocks, chans, lfe, i;
1950 
1951   marker = GST_READ_UINT32_BE (c->data);
1952 
1953   /* raw big endian or 14-bit big endian */
1954   if (marker == 0x7FFE8001 || marker == 0x1FFFE800) {
1955     *endianness = G_BIG_ENDIAN;
1956     for (i = 0; i < G_N_ELEMENTS (hdr); ++i)
1957       hdr[i] = GST_READ_UINT16_BE (c->data + (i * sizeof (guint16)));
1958   } else
1959     /* raw little endian or 14-bit little endian */
1960   if (marker == 0xFE7F0180 || marker == 0xFF1F00E8) {
1961     *endianness = G_LITTLE_ENDIAN;
1962     for (i = 0; i < G_N_ELEMENTS (hdr); ++i)
1963       hdr[i] = GST_READ_UINT16_LE (c->data + (i * sizeof (guint16)));
1964   } else {
1965     return FALSE;
1966   }
1967 
1968   GST_LOG ("dts sync marker 0x%08x at offset %u", marker, (guint) c->offset);
1969 
1970   /* 14-bit mode */
1971   if (marker == 0x1FFFE800 || marker == 0xFF1F00E8) {
1972     if ((hdr[2] & 0xFFF0) != 0x07F0)
1973       return FALSE;
1974     /* discard top 2 bits (2 void), shift in 2 */
1975     hdr[0] = (hdr[0] << 2) | ((hdr[1] >> 12) & 0x0003);
1976     /* discard top 4 bits (2 void, 2 shifted into hdr[0]), shift in 4 etc. */
1977     hdr[1] = (hdr[1] << 4) | ((hdr[2] >> 10) & 0x000F);
1978     hdr[2] = (hdr[2] << 6) | ((hdr[3] >> 8) & 0x003F);
1979     hdr[3] = (hdr[3] << 8) | ((hdr[4] >> 6) & 0x00FF);
1980     hdr[4] = (hdr[4] << 10) | ((hdr[5] >> 4) & 0x03FF);
1981     hdr[5] = (hdr[5] << 12) | ((hdr[6] >> 2) & 0x0FFF);
1982     hdr[6] = (hdr[6] << 14) | ((hdr[7] >> 0) & 0x3FFF);
1983     g_assert (hdr[0] == 0x7FFE && hdr[1] == 0x8001);
1984     *depth = 14;
1985   } else {
1986     *depth = 16;
1987   }
1988 
1989   GST_LOG ("frame header: %04x%04x%04x%04x", hdr[2], hdr[3], hdr[4], hdr[5]);
1990 
1991   num_blocks = (hdr[2] >> 2) & 0x7F;
1992   *frame_size = (((hdr[2] & 0x03) << 12) | (hdr[3] >> 4)) + 1;
1993   chans = ((hdr[3] & 0x0F) << 2) | (hdr[4] >> 14);
1994   *sample_rate = sample_rates[(hdr[4] >> 10) & 0x0F];
1995   lfe = (hdr[5] >> 9) & 0x03;
1996 
1997   if (num_blocks < 5 || *frame_size < 96 || *sample_rate == 0)
1998     return FALSE;
1999 
2000   if (marker == 0x1FFFE800 || marker == 0xFF1F00E8)
2001     *frame_size = (*frame_size * 16) / 14;      /* FIXME: round up? */
2002 
2003   if (chans < G_N_ELEMENTS (channels_table))
2004     *channels = channels_table[chans] + ((lfe) ? 1 : 0);
2005   else
2006     *channels = 0;
2007 
2008   return TRUE;
2009 }
2010 
2011 static void
dts_type_find(GstTypeFind * tf,gpointer unused)2012 dts_type_find (GstTypeFind * tf, gpointer unused)
2013 {
2014   DataScanCtx c = { 0, NULL, 0 };
2015 
2016   /* Search for an dts frame; not necessarily right at the start, but give it
2017    * a lower probability if not found right at the start. Check that the
2018    * frame is followed by a second frame at the expected offset. */
2019   while (c.offset <= DTS_MAX_FRAMESIZE) {
2020     guint frame_size = 0, rate = 0, chans = 0, depth = 0, endianness = 0;
2021 
2022     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, DTS_MIN_FRAMESIZE)))
2023       return;
2024 
2025     if (G_UNLIKELY (dts_parse_frame_header (&c, &frame_size, &rate, &chans,
2026                 &depth, &endianness))) {
2027       GstTypeFindProbability prob;
2028       DataScanCtx next_c;
2029 
2030       prob = (c.offset == 0) ? GST_TYPE_FIND_LIKELY : GST_TYPE_FIND_POSSIBLE;
2031 
2032       /* check for second frame sync */
2033       next_c = c;
2034       data_scan_ctx_advance (tf, &next_c, frame_size);
2035       if (data_scan_ctx_ensure_data (tf, &next_c, 4)) {
2036         GST_LOG ("frame size: %u 0x%04x", frame_size, frame_size);
2037         GST_MEMDUMP ("second frame sync", next_c.data, 4);
2038         if (GST_READ_UINT32_BE (c.data) == GST_READ_UINT32_BE (next_c.data))
2039           prob = GST_TYPE_FIND_MAXIMUM;
2040       }
2041 
2042       if (chans > 0) {
2043         gst_type_find_suggest_simple (tf, prob, "audio/x-dts",
2044             "rate", G_TYPE_INT, rate, "channels", G_TYPE_INT, chans,
2045             "depth", G_TYPE_INT, depth, "endianness", G_TYPE_INT, endianness,
2046             "framed", G_TYPE_BOOLEAN, FALSE, NULL);
2047       } else {
2048         gst_type_find_suggest_simple (tf, prob, "audio/x-dts",
2049             "rate", G_TYPE_INT, rate, "depth", G_TYPE_INT, depth,
2050             "endianness", G_TYPE_INT, endianness,
2051             "framed", G_TYPE_BOOLEAN, FALSE, NULL);
2052       }
2053 
2054       return;
2055     }
2056 
2057     data_scan_ctx_advance (tf, &c, 1);
2058   }
2059 }
2060 
2061 /*** gsm ***/
2062 
2063 /* can only be detected by using the extension, in which case we use the default
2064  * GSM properties */
2065 static GstStaticCaps gsm_caps =
2066 GST_STATIC_CAPS ("audio/x-gsm, rate=8000, channels=1");
2067 
2068 #define GSM_CAPS (gst_static_caps_get(&gsm_caps))
2069 
2070 /*** wavpack ***/
2071 
2072 static GstStaticCaps wavpack_caps =
2073 GST_STATIC_CAPS ("audio/x-wavpack, framed = (boolean) false");
2074 
2075 #define WAVPACK_CAPS (gst_static_caps_get(&wavpack_caps))
2076 
2077 static GstStaticCaps wavpack_correction_caps =
2078 GST_STATIC_CAPS ("audio/x-wavpack-correction, framed = (boolean) false");
2079 
2080 #define WAVPACK_CORRECTION_CAPS (gst_static_caps_get(&wavpack_correction_caps))
2081 
2082 static void
wavpack_type_find(GstTypeFind * tf,gpointer unused)2083 wavpack_type_find (GstTypeFind * tf, gpointer unused)
2084 {
2085   GstTypeFindProbability base_prob = GST_TYPE_FIND_POSSIBLE;
2086   guint64 offset;
2087   guint32 blocksize;
2088   const guint8 *data;
2089   guint count_wv, count_wvc;
2090 
2091   data = gst_type_find_peek (tf, 0, 32);
2092   if (!data)
2093     return;
2094 
2095   if (data[0] != 'w' || data[1] != 'v' || data[2] != 'p' || data[3] != 'k')
2096     return;
2097 
2098   /* Note: wavpack blocks can be fairly large (easily 60-110k), possibly
2099    * larger than the max. limits imposed by certain typefinding elements
2100    * like id3demux or apedemux, so typefinding is most likely only going to
2101    * work in pull-mode */
2102   blocksize = GST_READ_UINT32_LE (data + 4);
2103   GST_LOG ("wavpack header, blocksize=0x%04x", blocksize);
2104   /* If bigger than maximum allowed blocksize, refuse */
2105   if (blocksize > 131072)
2106     return;
2107   count_wv = 0;
2108   count_wvc = 0;
2109   offset = 32;
2110   while (offset < 8 + blocksize) {
2111     guint32 sublen;
2112 
2113     /* get chunk header */
2114     GST_LOG ("peeking at chunk at offset 0x%04x", (guint) offset);
2115     data = gst_type_find_peek (tf, offset, 4);
2116     if (data == NULL)
2117       break;
2118     sublen = ((guint32) data[1]) << 1;
2119     if (data[0] & 0x80) {
2120       sublen |= (((guint32) data[2]) << 9) | (((guint32) data[3]) << 17);
2121       sublen += 1 + 3;          /* id + length */
2122     } else {
2123       sublen += 1 + 1;          /* id + length */
2124     }
2125     if (offset + sublen > 8 + blocksize) {
2126       GST_LOG ("chunk length too big (%u > %" G_GUINT64_FORMAT ")", sublen,
2127           blocksize - offset);
2128       break;
2129     }
2130     if ((data[0] & 0x20) == 0) {
2131       switch (data[0] & 0x0f) {
2132         case 0xa:              /* ID_WV_BITSTREAM  */
2133         case 0xc:              /* ID_WVX_BITSTREAM */
2134           ++count_wv;
2135           break;
2136         case 0xb:              /* ID_WVC_BITSTREAM */
2137           ++count_wvc;
2138           break;
2139         default:
2140           break;
2141       }
2142       if (count_wv >= 5 || count_wvc >= 5)
2143         break;
2144     }
2145     offset += sublen;
2146   }
2147 
2148   /* check for second block header */
2149   data = gst_type_find_peek (tf, 8 + blocksize, 4);
2150   if (data != NULL && memcmp (data, "wvpk", 4) == 0) {
2151     GST_DEBUG ("found second block sync");
2152     base_prob = GST_TYPE_FIND_LIKELY;
2153   }
2154 
2155   GST_DEBUG ("wvc=%d, wv=%d", count_wvc, count_wv);
2156 
2157   if (count_wvc > 0 && count_wvc > count_wv) {
2158     gst_type_find_suggest (tf,
2159         MIN (base_prob + 5 * count_wvc, GST_TYPE_FIND_NEARLY_CERTAIN),
2160         WAVPACK_CORRECTION_CAPS);
2161   } else if (count_wv > 0) {
2162     gst_type_find_suggest (tf,
2163         MIN (base_prob + 5 * count_wv, GST_TYPE_FIND_NEARLY_CERTAIN),
2164         WAVPACK_CAPS);
2165   }
2166 }
2167 
2168 /*** application/postscrip ***/
2169 static GstStaticCaps postscript_caps =
2170 GST_STATIC_CAPS ("application/postscript");
2171 
2172 #define POSTSCRIPT_CAPS (gst_static_caps_get(&postscript_caps))
2173 
2174 static void
postscript_type_find(GstTypeFind * tf,gpointer unused)2175 postscript_type_find (GstTypeFind * tf, gpointer unused)
2176 {
2177   const guint8 *data = gst_type_find_peek (tf, 0, 3);
2178   if (!data)
2179     return;
2180 
2181   if (data[0] == 0x04)
2182     data++;
2183   if (data[0] == '%' && data[1] == '!')
2184     gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, POSTSCRIPT_CAPS);
2185 
2186 }
2187 
2188 /*** image/svg+xml ***/
2189 static GstStaticCaps svg_caps = GST_STATIC_CAPS ("image/svg+xml");
2190 
2191 #define SVG_CAPS (gst_static_caps_get(&svg_caps))
2192 
2193 static void
svg_type_find(GstTypeFind * tf,gpointer unused)2194 svg_type_find (GstTypeFind * tf, gpointer unused)
2195 {
2196   static const gchar svg_doctype[] = "!DOCTYPE svg";
2197   static const gchar svg_tag[] = "<svg";
2198   DataScanCtx c = { 0, NULL, 0 };
2199 
2200   while (c.offset <= 1024) {
2201     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 12)))
2202       break;
2203 
2204     if (memcmp (svg_doctype, c.data, 12) == 0) {
2205       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SVG_CAPS);
2206       return;
2207     } else if (memcmp (svg_tag, c.data, 4) == 0) {
2208       gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, SVG_CAPS);
2209       return;
2210     }
2211     data_scan_ctx_advance (tf, &c, 1);
2212   }
2213 }
2214 
2215 /*** multipart/x-mixed-replace mimestream ***/
2216 
2217 static GstStaticCaps multipart_caps =
2218 GST_STATIC_CAPS ("multipart/x-mixed-replace");
2219 #define MULTIPART_CAPS gst_static_caps_get(&multipart_caps)
2220 
2221 /* multipart/x-mixed replace is:
2222  *   <maybe some whitespace>--<some ascii chars>[\r]\n
2223  *   <more ascii chars>[\r]\nContent-type:<more ascii>[\r]\n */
2224 static void
multipart_type_find(GstTypeFind * tf,gpointer unused)2225 multipart_type_find (GstTypeFind * tf, gpointer unused)
2226 {
2227   const guint8 *data;
2228   const guint8 *x;
2229 
2230 #define MULTIPART_MAX_BOUNDARY_OFFSET 16
2231   data = gst_type_find_peek (tf, 0, MULTIPART_MAX_BOUNDARY_OFFSET);
2232   if (!data)
2233     return;
2234 
2235   for (x = data;
2236       x - data < MULTIPART_MAX_BOUNDARY_OFFSET - 2 && g_ascii_isspace (*x);
2237       x++);
2238   if (x[0] != '-' || x[1] != '-')
2239     return;
2240 
2241   /* Could be okay, peek what should be enough for a complete header */
2242 #define MULTIPART_MAX_HEADER_SIZE 256
2243   data = gst_type_find_peek (tf, 0, MULTIPART_MAX_HEADER_SIZE);
2244   if (!data)
2245     return;
2246 
2247   for (x = data; x - data < MULTIPART_MAX_HEADER_SIZE - 14; x++) {
2248     if (!isascii (*x)) {
2249       return;
2250     }
2251     if (*x == '\n' &&
2252         !g_ascii_strncasecmp ("content-type:", (gchar *) x + 1, 13)) {
2253       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MULTIPART_CAPS);
2254       return;
2255     }
2256   }
2257 }
2258 
2259 /*** video/mpeg systemstream ***/
2260 static GstStaticCaps mpeg_sys_caps = GST_STATIC_CAPS ("video/mpeg, "
2261     "systemstream = (boolean) true, mpegversion = (int) [ 1, 2 ]");
2262 
2263 #define MPEG_SYS_CAPS gst_static_caps_get(&mpeg_sys_caps)
2264 #define IS_MPEG_HEADER(data) (G_UNLIKELY((((guint8 *)(data))[0] == 0x00) &&  \
2265                                          (((guint8 *)(data))[1] == 0x00) &&  \
2266                                          (((guint8 *)(data))[2] == 0x01)))
2267 
2268 #define IS_MPEG_PACK_CODE(b) ((b) == 0xBA)
2269 #define IS_MPEG_SYS_CODE(b) ((b) == 0xBB)
2270 #define IS_MPEG_PACK_HEADER(data)       (IS_MPEG_HEADER (data) &&            \
2271                                          IS_MPEG_PACK_CODE (((guint8 *)(data))[3]))
2272 
2273 #define IS_MPEG_PES_CODE(b) (((b) & 0xF0) == 0xE0 || ((b) & 0xF0) == 0xC0 || \
2274                              (b) >= 0xBC)
2275 #define IS_MPEG_PES_HEADER(data)        (IS_MPEG_HEADER (data) &&            \
2276                                          IS_MPEG_PES_CODE (((guint8 *)(data))[3]))
2277 
2278 #define MPEG2_MAX_PROBE_LENGTH (128 * 1024)     /* 128kB should be 64 packs of the
2279                                                  * most common 2kB pack size. */
2280 
2281 #define MPEG2_MIN_SYS_HEADERS 2
2282 #define MPEG2_MAX_SYS_HEADERS 5
2283 
2284 static gboolean
mpeg_sys_is_valid_pack(GstTypeFind * tf,const guint8 * data,guint len,guint * pack_size)2285 mpeg_sys_is_valid_pack (GstTypeFind * tf, const guint8 * data, guint len,
2286     guint * pack_size)
2287 {
2288   /* Check the pack header @ offset for validity, assuming that the 4 byte header
2289    * itself has already been checked. */
2290   guint8 stuff_len;
2291 
2292   if (len < 12)
2293     return FALSE;
2294 
2295   /* Check marker bits */
2296   if ((data[4] & 0xC4) == 0x44) {
2297     /* MPEG-2 PACK */
2298     if (len < 14)
2299       return FALSE;
2300 
2301     if ((data[6] & 0x04) != 0x04 ||
2302         (data[8] & 0x04) != 0x04 ||
2303         (data[9] & 0x01) != 0x01 || (data[12] & 0x03) != 0x03)
2304       return FALSE;
2305 
2306     stuff_len = data[13] & 0x07;
2307 
2308     /* Check the following header bytes, if we can */
2309     if ((14 + stuff_len + 4) <= len) {
2310       if (!IS_MPEG_HEADER (data + 14 + stuff_len))
2311         return FALSE;
2312     }
2313     if (pack_size)
2314       *pack_size = 14 + stuff_len;
2315     return TRUE;
2316   } else if ((data[4] & 0xF1) == 0x21) {
2317     /* MPEG-1 PACK */
2318     if ((data[6] & 0x01) != 0x01 ||
2319         (data[8] & 0x01) != 0x01 ||
2320         (data[9] & 0x80) != 0x80 || (data[11] & 0x01) != 0x01)
2321       return FALSE;
2322 
2323     /* Check the following header bytes, if we can */
2324     if ((12 + 4) <= len) {
2325       if (!IS_MPEG_HEADER (data + 12))
2326         return FALSE;
2327     }
2328     if (pack_size)
2329       *pack_size = 12;
2330     return TRUE;
2331   }
2332 
2333   return FALSE;
2334 }
2335 
2336 static gboolean
mpeg_sys_is_valid_pes(GstTypeFind * tf,const guint8 * data,guint len,guint * pack_size)2337 mpeg_sys_is_valid_pes (GstTypeFind * tf, const guint8 * data, guint len,
2338     guint * pack_size)
2339 {
2340   guint pes_packet_len;
2341 
2342   /* Check the PES header at the given position, assuming the header code itself
2343    * was already checked */
2344   if (len < 6)
2345     return FALSE;
2346 
2347   /* For MPEG Program streams, unbounded PES is not allowed, so we must have a
2348    * valid length present */
2349   pes_packet_len = GST_READ_UINT16_BE (data + 4);
2350   if (pes_packet_len == 0)
2351     return FALSE;
2352 
2353   /* Check the following header, if we can */
2354   if (6 + pes_packet_len + 4 <= len) {
2355     if (!IS_MPEG_HEADER (data + 6 + pes_packet_len))
2356       return FALSE;
2357   }
2358 
2359   if (pack_size)
2360     *pack_size = 6 + pes_packet_len;
2361   return TRUE;
2362 }
2363 
2364 static gboolean
mpeg_sys_is_valid_sys(GstTypeFind * tf,const guint8 * data,guint len,guint * pack_size)2365 mpeg_sys_is_valid_sys (GstTypeFind * tf, const guint8 * data, guint len,
2366     guint * pack_size)
2367 {
2368   guint sys_hdr_len;
2369 
2370   /* Check the System header at the given position, assuming the header code itself
2371    * was already checked */
2372   if (len < 6)
2373     return FALSE;
2374   sys_hdr_len = GST_READ_UINT16_BE (data + 4);
2375   if (sys_hdr_len < 6)
2376     return FALSE;
2377 
2378   /* Check the following header, if we can */
2379   if (6 + sys_hdr_len + 4 <= len) {
2380     if (!IS_MPEG_HEADER (data + 6 + sys_hdr_len))
2381       return FALSE;
2382   }
2383 
2384   if (pack_size)
2385     *pack_size = 6 + sys_hdr_len;
2386 
2387   return TRUE;
2388 }
2389 
2390 /* calculation of possibility to identify random data as mpeg systemstream:
2391  * bits that must match in header detection:            32 (or more)
2392  * chance that random data is identifed:                1/2^32
2393  * chance that MPEG2_MIN_PACK_HEADERS headers are identified:
2394  *       1/2^(32*MPEG2_MIN_PACK_HEADERS)
2395  * chance that this happens in MPEG2_MAX_PROBE_LENGTH bytes:
2396  *       1-(1+1/2^(32*MPEG2_MIN_PACK_HEADERS)^MPEG2_MAX_PROBE_LENGTH)
2397  * for current values:
2398  *       1-(1+1/2^(32*4)^101024)
2399  *       = <some_number>
2400  * Since we also check marker bits and pes packet lengths, this probability is a
2401  * very coarse upper bound.
2402  */
2403 static void
mpeg_sys_type_find(GstTypeFind * tf,gpointer unused)2404 mpeg_sys_type_find (GstTypeFind * tf, gpointer unused)
2405 {
2406   const guint8 *data, *data0, *first_sync, *end;
2407   gint mpegversion = 0;
2408   guint pack_headers = 0;
2409   guint pes_headers = 0;
2410   guint pack_size;
2411   guint since_last_sync = 0;
2412   guint32 sync_word = 0xffffffff;
2413   guint potential_headers = 0;
2414 
2415   G_STMT_START {
2416     gint len;
2417 
2418     len = MPEG2_MAX_PROBE_LENGTH;
2419 
2420     while (len >= 16) {
2421       data = gst_type_find_peek (tf, 0, 5 + len);
2422       if (data != NULL)
2423         break;
2424       len = len / 2;
2425     }
2426 
2427     if (!data)
2428       return;
2429 
2430     end = data + len;
2431   }
2432   G_STMT_END;
2433 
2434   data0 = data;
2435   first_sync = NULL;
2436 
2437   while (data < end) {
2438     sync_word <<= 8;
2439     if (sync_word == 0x00000100) {
2440       /* Found potential sync word */
2441       if (first_sync == NULL)
2442         first_sync = data - 3;
2443 
2444       if (since_last_sync > 4) {
2445         /* If more than 4 bytes since the last sync word, reset our counters,
2446          * as we're only interested in counting contiguous packets */
2447         pes_headers = pack_headers = 0;
2448       }
2449       pack_size = 0;
2450 
2451       potential_headers++;
2452       if (IS_MPEG_PACK_CODE (data[0])) {
2453         if ((data[1] & 0xC0) == 0x40) {
2454           /* MPEG-2 */
2455           mpegversion = 2;
2456         } else if ((data[1] & 0xF0) == 0x20) {
2457           mpegversion = 1;
2458         }
2459         if (mpegversion != 0 &&
2460             mpeg_sys_is_valid_pack (tf, data - 3, end - data + 3, &pack_size)) {
2461           pack_headers++;
2462         }
2463       } else if (IS_MPEG_PES_CODE (data[0])) {
2464         /* PES stream */
2465         if (mpeg_sys_is_valid_pes (tf, data - 3, end - data + 3, &pack_size)) {
2466           pes_headers++;
2467           if (mpegversion == 0)
2468             mpegversion = 2;
2469         }
2470       } else if (IS_MPEG_SYS_CODE (data[0])) {
2471         if (mpeg_sys_is_valid_sys (tf, data - 3, end - data + 3, &pack_size)) {
2472           pack_headers++;
2473         }
2474       }
2475 
2476       /* If we found a packet with a known size, skip the bytes in it and loop
2477        * around to check the next packet. */
2478       if (pack_size != 0) {
2479         data += pack_size - 3;
2480         sync_word = 0xffffffff;
2481         since_last_sync = 0;
2482         continue;
2483       }
2484     }
2485 
2486     sync_word |= data[0];
2487     since_last_sync++;
2488     data++;
2489 
2490     /* If we have found MAX headers, and *some* were pes headers (pack headers
2491      * are optional in an mpeg system stream) then return our high-probability
2492      * result */
2493     if (pes_headers > 0 && (pack_headers + pes_headers) > MPEG2_MAX_SYS_HEADERS)
2494       goto suggest;
2495   }
2496 
2497   /* If we at least saw MIN headers, and *some* were pes headers (pack headers
2498    * are optional in an mpeg system stream) then return a lower-probability
2499    * result */
2500   if (pes_headers > 0 && (pack_headers + pes_headers) > MPEG2_MIN_SYS_HEADERS)
2501     goto suggest;
2502 
2503   return;
2504 suggest:
2505   {
2506     guint prob;
2507 
2508     prob = GST_TYPE_FIND_POSSIBLE + (10 * (pack_headers + pes_headers));
2509     prob = MIN (prob, GST_TYPE_FIND_MAXIMUM);
2510 
2511     /* With the above test, we get into problems when we try to typefind
2512        a MPEG stream from a small amount of data, which can happen when
2513        we get data pushed from a HTTP source. We thus make a second test
2514        to give higher probability if all the potential headers were either
2515        pack or pes headers (ie, no potential header was unrecognized). */
2516     if (potential_headers == pack_headers + pes_headers) {
2517       GST_LOG ("Only %u headers, but all were recognized", potential_headers);
2518       prob += 10;
2519       prob = MIN (prob, GST_TYPE_FIND_MAXIMUM);
2520     }
2521 
2522     /* lower probability if the first packet wasn't right at the start */
2523     if (data0 != first_sync && prob >= 10)
2524       prob -= 10;
2525 
2526     GST_LOG ("Suggesting MPEG %d system stream, %d packs, %d pes, prob %u%%",
2527         mpegversion, pack_headers, pes_headers, prob);
2528 
2529     gst_type_find_suggest_simple (tf, prob, "video/mpeg",
2530         "systemstream", G_TYPE_BOOLEAN, TRUE,
2531         "mpegversion", G_TYPE_INT, mpegversion, NULL);
2532   }
2533 };
2534 
2535 /*** video/mpegts Transport Stream ***/
2536 static GstStaticCaps mpegts_caps = GST_STATIC_CAPS ("video/mpegts, "
2537     "systemstream = (boolean) true, packetsize = (int) [ 188, 208 ]");
2538 #define MPEGTS_CAPS gst_static_caps_get(&mpegts_caps)
2539 
2540 #define GST_MPEGTS_TYPEFIND_MIN_HEADERS 4
2541 #define GST_MPEGTS_TYPEFIND_MAX_HEADERS 10
2542 #define GST_MPEGTS_MAX_PACKET_SIZE 208
2543 #define GST_MPEGTS_TYPEFIND_SYNC_SIZE \
2544             (GST_MPEGTS_TYPEFIND_MIN_HEADERS * GST_MPEGTS_MAX_PACKET_SIZE)
2545 #define GST_MPEGTS_TYPEFIND_MAX_SYNC \
2546             (GST_MPEGTS_TYPEFIND_MAX_HEADERS * GST_MPEGTS_MAX_PACKET_SIZE)
2547 #define GST_MPEGTS_TYPEFIND_SCAN_LENGTH \
2548             (GST_MPEGTS_TYPEFIND_MAX_SYNC * 4)
2549 
2550 #define MPEGTS_HDR_SIZE 4
2551 /* Check for sync byte, error_indicator == 0 and packet has payload.
2552  * Adaptation control field (data[3] & 0x30) may be zero for TS packets with
2553  * null PIDs. Still, these streams are valid TS streams (for null packets,
2554  * AFC is supposed to be 0x1, but the spec also says decoders should just
2555  * discard any packets with AFC = 0x00) */
2556 #define IS_MPEGTS_HEADER(data) (data[0] == 0x47 && \
2557                                 (data[1] & 0x80) == 0x00 && \
2558                                 ((data[3] & 0x30) != 0x00 || \
2559                                 ((data[3] & 0x30) == 0x00 && (data[1] & 0x1f) == 0x1f && (data[2] & 0xff) == 0xff)))
2560 
2561 /* Helper function to search ahead at intervals of packet_size for mpegts
2562  * headers */
2563 static gint
mpeg_ts_probe_headers(GstTypeFind * tf,guint64 offset,gint packet_size)2564 mpeg_ts_probe_headers (GstTypeFind * tf, guint64 offset, gint packet_size)
2565 {
2566   /* We always enter this function having found at least one header already */
2567   gint found = 1;
2568   const guint8 *data = NULL;
2569 
2570   GST_LOG ("looking for mpeg-ts packets of size %u", packet_size);
2571   while (found < GST_MPEGTS_TYPEFIND_MAX_HEADERS) {
2572     offset += packet_size;
2573 
2574     data = gst_type_find_peek (tf, offset, MPEGTS_HDR_SIZE);
2575     if (data == NULL || !IS_MPEGTS_HEADER (data))
2576       return found;
2577 
2578     found++;
2579     GST_LOG ("mpeg-ts sync #%2d at offset %" G_GUINT64_FORMAT, found, offset);
2580   }
2581 
2582   return found;
2583 }
2584 
2585 /* Try and detect at least 4 packets in at most 10 packets worth of
2586  * data. Need to try several possible packet sizes */
2587 static void
mpeg_ts_type_find(GstTypeFind * tf,gpointer unused)2588 mpeg_ts_type_find (GstTypeFind * tf, gpointer unused)
2589 {
2590   /* TS packet sizes to test: normal, DVHS packet size and
2591    * FEC with 16 or 20 byte codes packet size. */
2592   const gint pack_sizes[] = { 188, 192, 204, 208 };
2593   const guint8 *data = NULL;
2594   guint size = 0;
2595   guint64 skipped = 0;
2596 
2597   while (skipped < GST_MPEGTS_TYPEFIND_SCAN_LENGTH) {
2598     if (size < MPEGTS_HDR_SIZE) {
2599       data = gst_type_find_peek (tf, skipped, GST_MPEGTS_TYPEFIND_SYNC_SIZE);
2600       if (!data)
2601         break;
2602       size = GST_MPEGTS_TYPEFIND_SYNC_SIZE;
2603     }
2604 
2605     /* Have at least MPEGTS_HDR_SIZE bytes at this point */
2606     if (IS_MPEGTS_HEADER (data)) {
2607       gsize p;
2608 
2609       GST_LOG ("possible mpeg-ts sync at offset %" G_GUINT64_FORMAT, skipped);
2610 
2611       for (p = 0; p < G_N_ELEMENTS (pack_sizes); p++) {
2612         gint found;
2613 
2614         /* Probe ahead at size pack_sizes[p] */
2615         found = mpeg_ts_probe_headers (tf, skipped, pack_sizes[p]);
2616         if (found >= GST_MPEGTS_TYPEFIND_MIN_HEADERS) {
2617           gint probability;
2618 
2619           /* found at least 4 headers. 10 headers = MAXIMUM probability.
2620            * Arbitrarily, I assigned 10% probability for each header we
2621            * found, 40% -> 100% */
2622           probability = MIN (10 * found, GST_TYPE_FIND_MAXIMUM);
2623 
2624           gst_type_find_suggest_simple (tf, probability, "video/mpegts",
2625               "systemstream", G_TYPE_BOOLEAN, TRUE,
2626               "packetsize", G_TYPE_INT, pack_sizes[p], NULL);
2627           return;
2628         }
2629       }
2630     }
2631     data++;
2632     skipped++;
2633     size--;
2634   }
2635 }
2636 
2637 #define GST_MPEGVID_TYPEFIND_TRY_PICTURES 6
2638 #define GST_MPEGVID_TYPEFIND_TRY_SYNC (100 * 1024)      /* 100 kB */
2639 
2640 /* Scan ahead a maximum of max_extra_offset bytes until the next IS_MPEG_HEADER
2641  * offset.  After the call, offset will be after the 0x000001, i.e. at the 4th
2642  * byte of the MPEG header.  Returns TRUE if a header was found, FALSE if not.
2643  */
2644 static gboolean
mpeg_find_next_header(GstTypeFind * tf,DataScanCtx * c,guint64 max_extra_offset)2645 mpeg_find_next_header (GstTypeFind * tf, DataScanCtx * c,
2646     guint64 max_extra_offset)
2647 {
2648   guint64 extra_offset;
2649 
2650   for (extra_offset = 0; extra_offset <= max_extra_offset; ++extra_offset) {
2651     if (!data_scan_ctx_ensure_data (tf, c, 4))
2652       return FALSE;
2653     if (IS_MPEG_HEADER (c->data)) {
2654       data_scan_ctx_advance (tf, c, 3);
2655       return TRUE;
2656     }
2657     data_scan_ctx_advance (tf, c, 1);
2658   }
2659   return FALSE;
2660 }
2661 
2662 /*** video/mpeg MPEG-4 elementary video stream ***/
2663 
2664 static GstStaticCaps mpeg4_video_caps = GST_STATIC_CAPS ("video/mpeg, "
2665     "systemstream=(boolean)false, mpegversion=4, parsed=(boolean)false");
2666 #define MPEG4_VIDEO_CAPS gst_static_caps_get(&mpeg4_video_caps)
2667 
2668 /*
2669  * This typefind is based on the elementary video header defined in
2670  * http://xhelmboyx.tripod.com/formats/mpeg-layout.txt
2671  * In addition, it allows the visual object sequence header to be
2672  * absent, and even the VOS header to be absent.  In the latter case,
2673  * a number of VOPs have to be present.
2674  */
2675 static void
mpeg4_video_type_find(GstTypeFind * tf,gpointer unused)2676 mpeg4_video_type_find (GstTypeFind * tf, gpointer unused)
2677 {
2678   DataScanCtx c = { 0, NULL, 0 };
2679   gboolean seen_vios_at_0 = FALSE;
2680   gboolean seen_vios = FALSE;
2681   gboolean seen_vos = FALSE;
2682   gboolean seen_vol = FALSE;
2683   guint num_vop_headers = 0;
2684   guint8 sc;
2685 
2686   while (c.offset < GST_MPEGVID_TYPEFIND_TRY_SYNC) {
2687     if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
2688       break;
2689 
2690     if (!mpeg_find_next_header (tf, &c,
2691             GST_MPEGVID_TYPEFIND_TRY_SYNC - c.offset))
2692       break;
2693 
2694     sc = c.data[0];
2695 
2696     /* visual_object_sequence_start_code */
2697     if (sc == 0xB0) {
2698       if (seen_vios)
2699         break;                  /* Terminate at second vios */
2700       if (c.offset == 0)
2701         seen_vios_at_0 = TRUE;
2702       seen_vios = TRUE;
2703       data_scan_ctx_advance (tf, &c, 2);
2704       if (!mpeg_find_next_header (tf, &c, 0))
2705         break;
2706 
2707       sc = c.data[0];
2708 
2709       /* Optional metadata */
2710       if (sc == 0xB2)
2711         if (!mpeg_find_next_header (tf, &c, 24))
2712           break;
2713     }
2714 
2715     /* visual_object_start_code (consider it optional) */
2716     if (sc == 0xB5) {
2717       data_scan_ctx_advance (tf, &c, 2);
2718       /* may contain ID marker and YUV clamping */
2719       if (!mpeg_find_next_header (tf, &c, 7))
2720         break;
2721 
2722       sc = c.data[0];
2723     }
2724 
2725     /* video_object_start_code */
2726     if (sc <= 0x1F) {
2727       if (seen_vos)
2728         break;                  /* Terminate at second vos */
2729       seen_vos = TRUE;
2730       data_scan_ctx_advance (tf, &c, 2);
2731       continue;
2732     }
2733 
2734     /* video_object_layer_start_code */
2735     if (sc >= 0x20 && sc <= 0x2F) {
2736       seen_vol = TRUE;
2737       data_scan_ctx_advance (tf, &c, 5);
2738       continue;
2739     }
2740 
2741     /* video_object_plane_start_code */
2742     if (sc == 0xB6) {
2743       num_vop_headers++;
2744       data_scan_ctx_advance (tf, &c, 2);
2745       continue;
2746     }
2747 
2748     /* Unknown start code. */
2749   }
2750 
2751   if (num_vop_headers > 0 || seen_vol) {
2752     GstTypeFindProbability probability = 0;
2753 
2754     GST_LOG ("Found %d pictures, vios: %d, vos:%d, vol:%d", num_vop_headers,
2755         seen_vios, seen_vos, seen_vol);
2756 
2757     if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vios_at_0
2758         && seen_vos && seen_vol)
2759       probability = GST_TYPE_FIND_MAXIMUM - 1;
2760     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vios
2761         && seen_vos && seen_vol)
2762       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 1;
2763     else if (seen_vios_at_0 && seen_vos && seen_vol)
2764       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 6;
2765     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vos
2766         && seen_vol)
2767       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 6;
2768     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_vol)
2769       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 9;
2770     else if (num_vop_headers >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
2771       probability = GST_TYPE_FIND_LIKELY - 1;
2772     else if (num_vop_headers > 2 && seen_vios && seen_vos && seen_vol)
2773       probability = GST_TYPE_FIND_LIKELY - 9;
2774     else if (seen_vios && seen_vos && seen_vol)
2775       probability = GST_TYPE_FIND_LIKELY - 20;
2776     else if (num_vop_headers > 0 && seen_vos && seen_vol)
2777       probability = GST_TYPE_FIND_POSSIBLE;
2778     else if (num_vop_headers > 0)
2779       probability = GST_TYPE_FIND_POSSIBLE - 10;
2780     else if (seen_vos && seen_vol)
2781 #ifdef OHOS_OPT_COMPAT
2782       /*
2783        * ohos.opt.compat.0015
2784        * mp3: mpeg audio stream is incorrectly identified as video: mpeg4 video stream,
2785        * which causes playback failure and lowers the score
2786        */
2787       probability = GST_TYPE_FIND_POSSIBLE - 40;
2788 #else
2789       probability = GST_TYPE_FIND_POSSIBLE - 20;
2790 #endif
2791     gst_type_find_suggest (tf, probability, MPEG4_VIDEO_CAPS);
2792   }
2793 }
2794 #ifdef OHOS_OPT_COMPAT
2795 /*
2796  * ohos.opt.compat.0015
2797  * mp3: mpeg audio stream is incorrectly identified as video:h263 or video:h264 or video:h265 video stream,
2798  * which causes playback failure and lowers the score
2799  */
2800 #else
2801 /*** video/x-h263 H263 video stream ***/
2802 static GstStaticCaps h263_video_caps =
2803 GST_STATIC_CAPS ("video/x-h263, variant=(string)itu");
2804 
2805 #define H263_VIDEO_CAPS gst_static_caps_get(&h263_video_caps)
2806 
2807 #define H263_MAX_PROBE_LENGTH (128 * 1024)
2808 
2809 static void
h263_video_type_find(GstTypeFind * tf,gpointer unused)2810 h263_video_type_find (GstTypeFind * tf, gpointer unused)
2811 {
2812   DataScanCtx c = { 0, NULL, 0 };
2813   guint64 data = 0xffff;        /* prevents false positive for first 2 bytes */
2814   guint64 psc = 0;
2815   guint8 ptype = 0;
2816   guint format;
2817   guint good = 0;
2818   guint bad = 0;
2819   guint pc_type, pb_mode;
2820 
2821   while (c.offset < H263_MAX_PROBE_LENGTH) {
2822 #ifdef OHOS_OPT_COMPAT
2823     // ohos.opt.compat.0004
2824     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4))) {
2825       GST_INFO ("h263 need_typefind_again");
2826       tf->need_typefind_again = TRUE;
2827       break;
2828     }
2829 #else
2830     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
2831       break;
2832 #endif
2833 
2834     /* Find the picture start code */
2835     data = (data << 8) + c.data[0];
2836     psc = data & G_GUINT64_CONSTANT (0xfffffc0000);
2837     if (psc == 0x800000) {
2838       /* Found PSC */
2839       /* PTYPE */
2840       ptype = (data & 0x3fc) >> 2;
2841       /* Source Format */
2842       format = ptype & 0x07;
2843 
2844       /* Now that we have a Valid PSC, check if we also have a valid PTYPE and
2845          the Source Format, which should range between 1 and 5 */
2846       if (((ptype >> 6) == 0x2) && (format > 0 && format < 6)) {
2847         pc_type = data & 0x02;
2848         pb_mode = c.data[1] & 0x20 >> 4;
2849         if (!pc_type && pb_mode)
2850           bad++;
2851         else
2852           good++;
2853       } else
2854         bad++;
2855 
2856       /* FIXME: maybe bail out early if we get mostly bad syncs ? */
2857     }
2858 
2859     data_scan_ctx_advance (tf, &c, 1);
2860   }
2861 
2862   GST_LOG ("good: %d, bad: %d", good, bad);
2863 
2864 #ifdef OHOS_OPT_COMPAT
2865   // ohos.opt.compat.0004
2866   if (good > 2 * bad) {
2867     if (!tf->need_typefind_again) {
2868       gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, H263_VIDEO_CAPS);
2869     }
2870   } else {
2871     tf->need_typefind_again = FALSE;
2872   }
2873 #else
2874   if (good > 2 * bad)
2875     gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, H263_VIDEO_CAPS);
2876 #endif
2877 
2878   return;
2879 }
2880 /*** video/x-h264 H264 elementary video stream ***/
2881 
2882 static GstStaticCaps h264_video_caps =
2883 GST_STATIC_CAPS ("video/x-h264,stream-format=byte-stream");
2884 
2885 #define H264_VIDEO_CAPS gst_static_caps_get(&h264_video_caps)
2886 
2887 #define H264_MAX_PROBE_LENGTH (128 * 1024)      /* 128kB for HD should be enough. */
2888 
2889 static void
h264_video_type_find(GstTypeFind * tf,gpointer unused)2890 h264_video_type_find (GstTypeFind * tf, gpointer unused)
2891 {
2892   DataScanCtx c = { 0, NULL, 0 };
2893 
2894   /* Stream consists of: a series of sync codes (00 00 00 01) followed
2895    * by NALs
2896    */
2897   gboolean seen_idr = FALSE;
2898   gboolean seen_sps = FALSE;
2899   gboolean seen_pps = FALSE;
2900   gboolean seen_ssps = FALSE;
2901   int nut, ref;
2902   int good = 0;
2903   int bad = 0;
2904 
2905   while (c.offset < H264_MAX_PROBE_LENGTH) {
2906 #ifdef OHOS_OPT_COMPAT
2907     // ohos.opt.compat.0004
2908     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4))) {
2909       GST_INFO ("h264_video_type_find need_typefind_again");
2910       tf->need_typefind_again = TRUE;
2911       break;
2912     }
2913 #else
2914     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 4)))
2915       break;
2916 #endif
2917 
2918     if (IS_MPEG_HEADER (c.data)) {
2919       nut = c.data[3] & 0x9f;   /* forbiden_zero_bit | nal_unit_type */
2920       ref = c.data[3] & 0x60;   /* nal_ref_idc */
2921 
2922       /* if forbidden bit is different to 0 won't be h264 */
2923       if (nut > 0x1f) {
2924         bad++;
2925         break;
2926       }
2927 
2928       /* collect statistics about the NAL types */
2929       if ((nut >= 1 && nut <= 13) || nut == 19) {
2930         if ((nut == 5 && ref == 0) ||
2931             ((nut == 6 || (nut >= 9 && nut <= 12)) && ref != 0)) {
2932           bad++;
2933         } else {
2934           if (nut == 7)
2935             seen_sps = TRUE;
2936           else if (nut == 8)
2937             seen_pps = TRUE;
2938           else if (nut == 5)
2939             seen_idr = TRUE;
2940 
2941           good++;
2942         }
2943       } else if (nut >= 14 && nut <= 33) {
2944         if (nut == 15) {
2945           seen_ssps = TRUE;
2946           good++;
2947         } else if (nut == 14 || nut == 20) {
2948           /* Sometimes we see NAL 14 or 20 without SSPS
2949            * if dropped into the middle of a stream -
2950            * just ignore those (don't add to bad count) */
2951           if (seen_ssps)
2952             good++;
2953         } else {
2954           /* reserved */
2955           /* Theoretically these are good, since if they exist in the
2956              stream it merely means that a newer backwards-compatible
2957              h.264 stream.  But we should be identifying that separately. */
2958           bad++;
2959         }
2960       } else {
2961         /* unspecified, application specific */
2962         /* don't consider these bad */
2963       }
2964 
2965       GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d ssps:%d", good, bad,
2966           seen_pps, seen_sps, seen_idr, seen_ssps);
2967 
2968       if (seen_sps && seen_pps && seen_idr && good >= 10 && bad < 4) {
2969         gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, H264_VIDEO_CAPS);
2970         return;
2971       }
2972 
2973       data_scan_ctx_advance (tf, &c, 4);
2974     }
2975     data_scan_ctx_advance (tf, &c, 1);
2976   }
2977 
2978   GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d ssps=%d", good, bad,
2979       seen_pps, seen_sps, seen_idr, seen_ssps);
2980 
2981 #ifdef OHOS_OPT_COMPAT
2982   // ohos.opt.compat.0004
2983   if (good >= 2 && bad == 0) {
2984     GstTypeFindProbability probability = GST_TYPE_FIND_POSSIBLE;
2985 
2986     if (seen_pps && seen_sps)
2987       probability = GST_TYPE_FIND_LIKELY;
2988     if (!tf->need_typefind_again) {
2989       gst_type_find_suggest (tf, probability, H264_VIDEO_CAPS);
2990     }
2991   } else {
2992     tf->need_typefind_again = FALSE;
2993   }
2994 else
2995   if (good >= 2 && bad == 0) {
2996     GstTypeFindProbability probability = GST_TYPE_FIND_POSSIBLE;
2997 
2998     if (seen_pps && seen_sps)
2999       probability = GST_TYPE_FIND_LIKELY;
3000 
3001     gst_type_find_suggest (tf, probability, H264_VIDEO_CAPS);
3002   }
3003 #endif
3004 }
3005 
3006 /*** video/x-h265 H265 elementary video stream ***/
3007 
3008 static GstStaticCaps h265_video_caps =
3009 GST_STATIC_CAPS ("video/x-h265,stream-format=byte-stream");
3010 
3011 #define H265_VIDEO_CAPS gst_static_caps_get(&h265_video_caps)
3012 
3013 #define H265_MAX_PROBE_LENGTH (128 * 1024)      /* 128kB for HD should be enough. */
3014 
3015 static void
h265_video_type_find(GstTypeFind * tf,gpointer unused)3016 h265_video_type_find (GstTypeFind * tf, gpointer unused)
3017 {
3018   DataScanCtx c = { 0, NULL, 0 };
3019 
3020   /* Stream consists of: a series of sync codes (00 00 00 01) followed
3021    * by NALs
3022    */
3023   gboolean seen_irap = FALSE;
3024   gboolean seen_vps = FALSE;
3025   gboolean seen_sps = FALSE;
3026   gboolean seen_pps = FALSE;
3027   int nut;
3028   int good = 0;
3029   int bad = 0;
3030 
3031   while (c.offset < H265_MAX_PROBE_LENGTH) {
3032     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 5)))
3033       break;
3034 
3035     if (IS_MPEG_HEADER (c.data)) {
3036       /* forbiden_zero_bit | nal_unit_type */
3037       nut = c.data[3] & 0xfe;
3038 
3039       /* if forbidden bit is different to 0 won't be h265 */
3040       if (nut > 0x7e) {
3041         bad++;
3042         break;
3043       }
3044       nut = nut >> 1;
3045 
3046       /* if nuh_layer_id is not zero or nuh_temporal_id_plus1 is zero then
3047        * it won't be h265 */
3048       if ((c.data[3] & 0x01) || (c.data[4] & 0xf8) || !(c.data[4] & 0x07)) {
3049         bad++;
3050         break;
3051       }
3052 
3053       /* collect statistics about the NAL types */
3054       if ((nut >= 0 && nut <= 9) || (nut >= 16 && nut <= 21) || (nut >= 32
3055               && nut <= 40)) {
3056         if (nut == 32)
3057           seen_vps = TRUE;
3058         else if (nut == 33)
3059           seen_sps = TRUE;
3060         else if (nut == 34)
3061           seen_pps = TRUE;
3062         else if (nut >= 16 && nut <= 21) {
3063           /* BLA, IDR and CRA pictures are belongs to be IRAP picture */
3064           /* we are not counting the reserved IRAP pictures (22 and 23) to good */
3065           seen_irap = TRUE;
3066         }
3067 
3068         good++;
3069       } else if ((nut >= 10 && nut <= 15) || (nut >= 22 && nut <= 31)
3070           || (nut >= 41 && nut <= 47)) {
3071         /* reserved values are counting as bad */
3072         bad++;
3073       } else {
3074         /* unspecified (48..63), application specific */
3075         /* don't consider these as bad */
3076       }
3077 
3078       GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, vps:%d, irap:%d", good, bad,
3079           seen_pps, seen_sps, seen_vps, seen_irap);
3080 
3081       if (seen_sps && seen_pps && seen_irap && good >= 10 && bad < 4) {
3082         gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, H265_VIDEO_CAPS);
3083         return;
3084       }
3085 
3086       data_scan_ctx_advance (tf, &c, 5);
3087     }
3088     data_scan_ctx_advance (tf, &c, 1);
3089   }
3090 
3091   GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, vps:%d, irap:%d", good, bad,
3092       seen_pps, seen_sps, seen_vps, seen_irap);
3093 
3094   if (good >= 2 && bad == 0) {
3095     GstTypeFindProbability probability = GST_TYPE_FIND_POSSIBLE;
3096 
3097     if (seen_pps && seen_sps && seen_vps)
3098       probability = GST_TYPE_FIND_LIKELY;
3099 
3100     gst_type_find_suggest (tf, probability, H265_VIDEO_CAPS);
3101   }
3102 }
3103 #endif
3104 
3105 /*** video/mpeg video stream ***/
3106 
3107 static GstStaticCaps mpeg_video_caps = GST_STATIC_CAPS ("video/mpeg, "
3108     "systemstream = (boolean) false");
3109 #define MPEG_VIDEO_CAPS gst_static_caps_get(&mpeg_video_caps)
3110 
3111 /*
3112  * Idea is the same as MPEG system stream typefinding: We check each
3113  * byte of the stream to see if - from that point on - the stream
3114  * matches a predefined set of marker bits as defined in the MPEG
3115  * video specs.
3116  *
3117  * I'm sure someone will do a chance calculation here too.
3118  */
3119 
3120 static void
mpeg_video_stream_type_find(GstTypeFind * tf,gpointer unused)3121 mpeg_video_stream_type_find (GstTypeFind * tf, gpointer unused)
3122 {
3123   DataScanCtx c = { 0, NULL, 0 };
3124   gboolean seen_seq_at_0 = FALSE;
3125   gboolean seen_seq = FALSE;
3126   gboolean seen_gop = FALSE;
3127   guint64 last_pic_offset = 0;
3128   gint num_pic_headers = 0;
3129   gint found = 0;
3130 
3131   while (c.offset < GST_MPEGVID_TYPEFIND_TRY_SYNC) {
3132     if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
3133       break;
3134 
3135     if (!data_scan_ctx_ensure_data (tf, &c, 5))
3136       break;
3137 
3138     if (!IS_MPEG_HEADER (c.data))
3139       goto next;
3140 
3141     /* a pack header indicates that this isn't an elementary stream */
3142     if (c.data[3] == 0xBA && mpeg_sys_is_valid_pack (tf, c.data, c.size, NULL))
3143       return;
3144 
3145     /* do we have a sequence header? */
3146     if (c.data[3] == 0xB3) {
3147       seen_seq_at_0 = seen_seq_at_0 || (c.offset == 0);
3148       seen_seq = TRUE;
3149       data_scan_ctx_advance (tf, &c, 4 + 8);
3150       continue;
3151     }
3152 
3153     /* or a GOP header */
3154     if (c.data[3] == 0xB8) {
3155       seen_gop = TRUE;
3156       data_scan_ctx_advance (tf, &c, 8);
3157       continue;
3158     }
3159 
3160     /* but what we'd really like to see is a picture header */
3161     if (c.data[3] == 0x00) {
3162       ++num_pic_headers;
3163       last_pic_offset = c.offset;
3164       data_scan_ctx_advance (tf, &c, 8);
3165       continue;
3166     }
3167 
3168     /* ... each followed by a slice header with slice_vertical_pos=1 that's
3169      * not too far away from the previously seen picture header. */
3170     if (c.data[3] == 0x01 && num_pic_headers > found &&
3171         (c.offset - last_pic_offset) >= 4 &&
3172         (c.offset - last_pic_offset) <= 64) {
3173       data_scan_ctx_advance (tf, &c, 4);
3174       found += 1;
3175       continue;
3176     }
3177 
3178   next:
3179 
3180     data_scan_ctx_advance (tf, &c, 1);
3181   }
3182 
3183   if (found > 0 || seen_seq) {
3184     GstTypeFindProbability probability = 0;
3185 
3186     GST_LOG ("Found %d pictures, seq:%d, gop:%d", found, seen_seq, seen_gop);
3187 
3188     if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_seq && seen_gop)
3189       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 1;
3190     else if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES && seen_seq)
3191       probability = GST_TYPE_FIND_NEARLY_CERTAIN - 9;
3192     else if (found >= GST_MPEGVID_TYPEFIND_TRY_PICTURES)
3193       probability = GST_TYPE_FIND_LIKELY;
3194     else if (seen_seq_at_0 && seen_gop && found > 2)
3195       probability = GST_TYPE_FIND_LIKELY - 10;
3196     else if (seen_seq && seen_gop && found > 2)
3197       probability = GST_TYPE_FIND_LIKELY - 20;
3198     else if (seen_seq_at_0 && found > 0)
3199       probability = GST_TYPE_FIND_POSSIBLE;
3200     else if (seen_seq && found > 0)
3201       probability = GST_TYPE_FIND_POSSIBLE - 5;
3202 #ifdef OHOS_OPT_COMPAT
3203     /*
3204      * ohos.opt.compat.0015
3205      * mp3: mpeg audio stream is incorrectly identified as video: mpeg video stream,
3206      * which causes playback failure and lowers the score
3207      */
3208     else if (found > 0)
3209       probability = GST_TYPE_FIND_POSSIBLE - 40;
3210     else if (seen_seq)
3211       probability = GST_TYPE_FIND_POSSIBLE - 45;
3212 #else
3213     else if (found > 0)
3214       probability = GST_TYPE_FIND_POSSIBLE - 10;
3215     else if (seen_seq)
3216       probability = GST_TYPE_FIND_POSSIBLE - 20;
3217 #endif
3218 
3219     gst_type_find_suggest_simple (tf, probability, "video/mpeg",
3220         "systemstream", G_TYPE_BOOLEAN, FALSE,
3221         "mpegversion", G_TYPE_INT, 1, "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
3222   }
3223 }
3224 
3225 /*** audio/x-aiff ***/
3226 
3227 static GstStaticCaps aiff_caps = GST_STATIC_CAPS ("audio/x-aiff");
3228 
3229 #define AIFF_CAPS gst_static_caps_get(&aiff_caps)
3230 static void
aiff_type_find(GstTypeFind * tf,gpointer unused)3231 aiff_type_find (GstTypeFind * tf, gpointer unused)
3232 {
3233   const guint8 *data = gst_type_find_peek (tf, 0, 16);
3234 
3235   if (data && memcmp (data, "FORM", 4) == 0) {
3236     data += 8;
3237     if (memcmp (data, "AIFF", 4) == 0 || memcmp (data, "AIFC", 4) == 0)
3238       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, AIFF_CAPS);
3239   }
3240 }
3241 
3242 /*** audio/x-svx ***/
3243 
3244 static GstStaticCaps svx_caps = GST_STATIC_CAPS ("audio/x-svx");
3245 
3246 #define SVX_CAPS gst_static_caps_get(&svx_caps)
3247 static void
svx_type_find(GstTypeFind * tf,gpointer unused)3248 svx_type_find (GstTypeFind * tf, gpointer unused)
3249 {
3250   const guint8 *data = gst_type_find_peek (tf, 0, 16);
3251 
3252   if (data && memcmp (data, "FORM", 4) == 0) {
3253     data += 8;
3254     if (memcmp (data, "8SVX", 4) == 0 || memcmp (data, "16SV", 4) == 0)
3255       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SVX_CAPS);
3256   }
3257 }
3258 
3259 /*** audio/x-shorten ***/
3260 
3261 static GstStaticCaps shn_caps = GST_STATIC_CAPS ("audio/x-shorten");
3262 
3263 #define SHN_CAPS gst_static_caps_get(&shn_caps)
3264 static void
shn_type_find(GstTypeFind * tf,gpointer unused)3265 shn_type_find (GstTypeFind * tf, gpointer unused)
3266 {
3267   const guint8 *data = gst_type_find_peek (tf, 0, 4);
3268 
3269   if (data && memcmp (data, "ajkg", 4) == 0) {
3270     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SHN_CAPS);
3271   }
3272   data = gst_type_find_peek (tf, -8, 8);
3273   if (data && memcmp (data, "SHNAMPSK", 8) == 0) {
3274     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SHN_CAPS);
3275   }
3276 }
3277 
3278 /*** application/x-ape ***/
3279 
3280 static GstStaticCaps ape_caps = GST_STATIC_CAPS ("application/x-ape");
3281 
3282 #define APE_CAPS gst_static_caps_get(&ape_caps)
3283 static void
ape_type_find(GstTypeFind * tf,gpointer unused)3284 ape_type_find (GstTypeFind * tf, gpointer unused)
3285 {
3286   const guint8 *data = gst_type_find_peek (tf, 0, 4);
3287 
3288   if (data && memcmp (data, "MAC ", 4) == 0) {
3289     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY + 10, APE_CAPS);
3290   }
3291 }
3292 
3293 /*** ISO FORMATS ***/
3294 
3295 /*** audio/x-m4a ***/
3296 
3297 static GstStaticCaps m4a_caps = GST_STATIC_CAPS ("audio/x-m4a");
3298 
3299 #define M4A_CAPS (gst_static_caps_get(&m4a_caps))
3300 static void
m4a_type_find(GstTypeFind * tf,gpointer unused)3301 m4a_type_find (GstTypeFind * tf, gpointer unused)
3302 {
3303   const guint8 *data = gst_type_find_peek (tf, 4, 8);
3304 
3305   if (data && (memcmp (data, "ftypM4A ", 8) == 0)) {
3306     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, M4A_CAPS);
3307   }
3308 }
3309 
3310 /*** application/x-3gp ***/
3311 
3312 /* The Q is there because variables can't start with a number. */
3313 static GstStaticCaps q3gp_caps = GST_STATIC_CAPS ("application/x-3gp");
3314 #define Q3GP_CAPS (gst_static_caps_get(&q3gp_caps))
3315 
3316 static const gchar *
q3gp_type_find_get_profile(const guint8 * data)3317 q3gp_type_find_get_profile (const guint8 * data)
3318 {
3319   switch (GST_MAKE_FOURCC (data[0], data[1], data[2], 0)) {
3320     case GST_MAKE_FOURCC ('3', 'g', 'g', 0):
3321       return "general";
3322     case GST_MAKE_FOURCC ('3', 'g', 'p', 0):
3323       return "basic";
3324     case GST_MAKE_FOURCC ('3', 'g', 's', 0):
3325       return "streaming-server";
3326     case GST_MAKE_FOURCC ('3', 'g', 'r', 0):
3327       return "progressive-download";
3328     default:
3329       break;
3330   }
3331   return NULL;
3332 }
3333 
3334 static void
q3gp_type_find(GstTypeFind * tf,gpointer unused)3335 q3gp_type_find (GstTypeFind * tf, gpointer unused)
3336 {
3337   const gchar *profile;
3338   guint32 ftyp_size = 0;
3339   guint32 offset = 0;
3340   const guint8 *data = NULL;
3341 
3342   if ((data = gst_type_find_peek (tf, 0, 12)) == NULL) {
3343     return;
3344   }
3345 
3346   data += 4;
3347   if (memcmp (data, "ftyp", 4) != 0) {
3348     return;
3349   }
3350 
3351   /* check major brand */
3352   data += 4;
3353   if ((profile = q3gp_type_find_get_profile (data))) {
3354     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
3355         "application/x-3gp", "profile", G_TYPE_STRING, profile, NULL);
3356     return;
3357   }
3358 
3359   /* check compatible brands */
3360   if ((data = gst_type_find_peek (tf, 0, 4)) != NULL) {
3361     ftyp_size = GST_READ_UINT32_BE (data);
3362   }
3363   if ((data = gst_type_find_peek (tf, 0, ftyp_size)) != NULL) {
3364     for (offset = 16; offset + 4 < ftyp_size; offset += 4) {
3365       if ((profile = q3gp_type_find_get_profile (data + offset))) {
3366         gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
3367             "application/x-3gp", "profile", G_TYPE_STRING, profile, NULL);
3368         return;
3369       }
3370     }
3371   }
3372 
3373   return;
3374 
3375 }
3376 
3377 /*** video/mj2 and image/jp2 ***/
3378 static GstStaticCaps mj2_caps = GST_STATIC_CAPS ("video/mj2");
3379 
3380 #define MJ2_CAPS gst_static_caps_get(&mj2_caps)
3381 
3382 static GstStaticCaps jp2_caps = GST_STATIC_CAPS ("image/jp2");
3383 
3384 #define JP2_CAPS gst_static_caps_get(&jp2_caps)
3385 
3386 static void
jp2_type_find(GstTypeFind * tf,gpointer unused)3387 jp2_type_find (GstTypeFind * tf, gpointer unused)
3388 {
3389   const guint8 *data;
3390 
3391   data = gst_type_find_peek (tf, 0, 24);
3392   if (!data)
3393     return;
3394 
3395   /* jp2 signature */
3396   if (memcmp (data, "\000\000\000\014jP  \015\012\207\012", 12) != 0)
3397     return;
3398 
3399   /* check ftyp box */
3400   data += 12;
3401   if (memcmp (data + 4, "ftyp", 4) == 0) {
3402     if (memcmp (data + 8, "jp2 ", 4) == 0)
3403       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, JP2_CAPS);
3404     else if (memcmp (data + 8, "mjp2", 4) == 0)
3405       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MJ2_CAPS);
3406   }
3407 }
3408 
3409 
3410 static GstStaticCaps jpc_caps = GST_STATIC_CAPS ("image/x-jpc");
3411 
3412 #define JPC_CAPS gst_static_caps_get(&jpc_caps)
3413 
3414 static void
jpc_type_find(GstTypeFind * tf,gpointer unused)3415 jpc_type_find (GstTypeFind * tf, gpointer unused)
3416 {
3417   gboolean found_cod = FALSE;
3418   gboolean found_qcd = FALSE;
3419   gboolean found_sot = FALSE;
3420   const guint8 *data;
3421   gint offset = 0;
3422   const guint8 soc_siz[] = { 0xff, 0x4f, 0xff, 0x51 };
3423 
3424 #define GST_TYPE_FIND_JPC_MARKER_SOT  0xFF90
3425 #define GST_TYPE_FIND_JPC_MARKER_COD  0xFF52
3426 #define GST_TYPE_FIND_JPC_MARKER_QCD  0xFF5C
3427 #define GST_TYPE_FIND_JPC_MARKER_COC  0xFF53
3428 #define GST_TYPE_FIND_JPC_MARKER_RGN  0xFF5E
3429 #define GST_TYPE_FIND_JPC_MARKER_QCC  0xFF5D
3430 #define GST_TYPE_FIND_JPC_MARKER_POC  0xFF5F
3431 #define GST_TYPE_FIND_JPC_MARKER_PLM  0xFF57
3432 #define GST_TYPE_FIND_JPC_MARKER_PPM  0xFF60
3433 #define GST_TYPE_FIND_JPC_MARKER_TLM  0xFF55
3434 #define GST_TYPE_FIND_JPC_MARKER_CRG  0xFF63
3435 #define GST_TYPE_FIND_JPC_MARKER_COM  0xFF64
3436 #define GST_TYPE_FIND_JPC_MARKER_CBD  0xFF78
3437 #define GST_TYPE_FIND_JPC_MARKER_MCC  0xFF75
3438 #define GST_TYPE_FIND_JPC_MARKER_MCT  0xFF74
3439 #define GST_TYPE_FIND_JPC_MARKER_MCO  0xFF77
3440 
3441 
3442   /* SOC marker + SIZ marker */
3443   if ((data = gst_type_find_peek (tf, 0, 4)) != NULL) {
3444     if (memcmp (data, soc_siz, 4) != 0)
3445       return;
3446     offset += 4;
3447   } else {
3448     return;
3449   }
3450 
3451   while (!found_sot) {
3452 
3453     /* skip actual marker data */
3454     if ((data = gst_type_find_peek (tf, offset, 2)) != NULL) {
3455       offset += GST_READ_UINT16_BE (data);
3456     } else {
3457       return;
3458     }
3459 
3460     /* read marker */
3461     if ((data = gst_type_find_peek (tf, offset, 2)) != NULL) {
3462       guint16 marker = GST_READ_UINT16_BE (data);
3463       switch (marker) {
3464         case GST_TYPE_FIND_JPC_MARKER_SOT:
3465           found_sot = TRUE;
3466           break;
3467         case GST_TYPE_FIND_JPC_MARKER_COD:
3468           found_cod = TRUE;
3469           break;
3470         case GST_TYPE_FIND_JPC_MARKER_QCD:
3471           found_qcd = TRUE;
3472           break;
3473           /* optional header markers */
3474         case GST_TYPE_FIND_JPC_MARKER_COC:
3475         case GST_TYPE_FIND_JPC_MARKER_RGN:
3476         case GST_TYPE_FIND_JPC_MARKER_QCC:
3477         case GST_TYPE_FIND_JPC_MARKER_POC:
3478         case GST_TYPE_FIND_JPC_MARKER_PLM:
3479         case GST_TYPE_FIND_JPC_MARKER_PPM:
3480         case GST_TYPE_FIND_JPC_MARKER_TLM:
3481         case GST_TYPE_FIND_JPC_MARKER_CRG:
3482         case GST_TYPE_FIND_JPC_MARKER_COM:
3483         case GST_TYPE_FIND_JPC_MARKER_CBD:
3484         case GST_TYPE_FIND_JPC_MARKER_MCC:
3485         case GST_TYPE_FIND_JPC_MARKER_MCT:
3486         case GST_TYPE_FIND_JPC_MARKER_MCO:
3487           break;
3488           /* unrecognized marker */
3489         default:
3490           return;
3491       }
3492       offset += 2;
3493     } else {
3494       return;
3495     }
3496   }
3497 
3498   if (found_cod && found_qcd && found_sot)
3499     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, JPC_CAPS);
3500 }
3501 
3502 
3503 /*** video/quicktime ***/
3504 
3505 static GstStaticCaps qt_caps = GST_STATIC_CAPS ("video/quicktime");
3506 
3507 #define QT_CAPS gst_static_caps_get(&qt_caps)
3508 
3509 static gboolean
ftyp_brand_is(const guint8 * brand,const gchar * brands[],gsize n_brands)3510 ftyp_brand_is (const guint8 * brand, const gchar * brands[], gsize n_brands)
3511 {
3512   gsize i;
3513 
3514   for (i = 0; i < n_brands; i++) {
3515     if (memcmp (brand, brands[i], 4) == 0)
3516       return TRUE;
3517   }
3518 
3519   return FALSE;
3520 }
3521 
3522 /* FIXME 0.11: go through http://www.ftyps.com/ */
3523 static void
qt_type_find(GstTypeFind * tf,gpointer unused)3524 qt_type_find (GstTypeFind * tf, gpointer unused)
3525 {
3526   const guint8 *data;
3527   guint tip = 0;
3528   guint atoms_in_a_row = 0;
3529   gboolean have_moov = FALSE, have_mdat = FALSE;
3530   guint64 offset = 0;
3531   guint64 size;
3532   const gchar *variant = NULL;
3533   static const gchar *qt_brands[] = { "qt  " };
3534   static const gchar *cmaf_brands[] = {
3535     "cmf2",
3536     "cmfc",
3537     /* CMAF AVC */
3538     "cfsd",
3539     "cfhd",
3540     "chdf",
3541     /* CMAF AAC */
3542     "caac",
3543     "caaa",
3544     /* CMAF CEA closed captions */
3545     "ccea",
3546     /* CMAF HEVC */
3547     "chhd",
3548     "chh1",
3549     "cud8",
3550     "cud1",
3551     "chd1",
3552     "clg1"
3553         /* TODO: CMAF WebVTT / IMSC1 */
3554   };
3555   static const gchar *iso_brands[] = {
3556     "isom",
3557     "avc1",
3558     "iso3",
3559     "iso4",
3560     "iso5",
3561     "iso6",
3562     "iso7",
3563     "iso8",
3564     "iso9",
3565     "isoa",
3566     "isob",
3567     "mp42",
3568     "mp41"
3569   };
3570   static const gchar *iso_fragmented_brands[] = {
3571     "isml",
3572     "dash",
3573     "hlsf",
3574     "avc3"
3575   };
3576   static const gchar *ccff_brands[] = { "ccff" };
3577   static const gchar *heif_brands[] = { "mif1" };
3578 
3579   while ((data = gst_type_find_peek (tf, offset, 12)) != NULL) {
3580     guint64 new_offset;
3581 
3582     if (memcmp (&data[4], "ftyp", 4) == 0) {
3583       if (ftyp_brand_is (&data[8], qt_brands, G_N_ELEMENTS (qt_brands))) {
3584         tip = GST_TYPE_FIND_MAXIMUM;
3585         break;
3586       }
3587 
3588       if (ftyp_brand_is (&data[8], cmaf_brands, G_N_ELEMENTS (cmaf_brands))) {
3589         tip = GST_TYPE_FIND_MAXIMUM;
3590         variant = "cmaf";
3591         break;
3592       }
3593 
3594       if (ftyp_brand_is (&data[8], iso_brands, G_N_ELEMENTS (iso_brands))) {
3595         tip = GST_TYPE_FIND_MAXIMUM;
3596         variant = "iso";
3597         break;
3598       }
3599 
3600       if (ftyp_brand_is (&data[8], iso_fragmented_brands,
3601               G_N_ELEMENTS (iso_fragmented_brands))) {
3602         tip = GST_TYPE_FIND_MAXIMUM;
3603         variant = "iso-fragmented";
3604         break;
3605       }
3606 
3607       if (ftyp_brand_is (&data[8], ccff_brands, G_N_ELEMENTS (ccff_brands))) {
3608         tip = GST_TYPE_FIND_MAXIMUM;
3609         variant = "ccff";
3610         break;
3611       }
3612 
3613       if (ftyp_brand_is (&data[8], heif_brands, G_N_ELEMENTS (heif_brands))) {
3614         tip = GST_TYPE_FIND_MAXIMUM;
3615         variant = "heif";
3616         break;
3617       }
3618     }
3619 
3620     /* top-level box/atom types that are in common with ISO base media file format */
3621     if (memcmp (&data[4], "moov", 4) == 0 ||
3622         memcmp (&data[4], "mdat", 4) == 0 ||
3623         memcmp (&data[4], "ftyp", 4) == 0 ||
3624         memcmp (&data[4], "free", 4) == 0 ||
3625         memcmp (&data[4], "uuid", 4) == 0 ||
3626         memcmp (&data[4], "udta", 4) == 0 ||
3627         memcmp (&data[4], "styp", 4) == 0 ||
3628         memcmp (&data[4], "sidx", 4) == 0 ||
3629         memcmp (&data[4], "ssix", 4) == 0 ||
3630         memcmp (&data[4], "prft", 4) == 0 ||
3631         memcmp (&data[4], "emsg", 4) == 0 ||
3632         memcmp (&data[4], "moof", 4) == 0 ||
3633         memcmp (&data[4], "mfra", 4) == 0 ||
3634         memcmp (&data[4], "pdin", 4) == 0 ||
3635         memcmp (&data[4], "meta", 4) == 0 ||
3636         memcmp (&data[4], "skip", 4) == 0) {
3637       if (tip == 0) {
3638         tip = GST_TYPE_FIND_LIKELY;
3639       } else {
3640         tip = GST_TYPE_FIND_NEARLY_CERTAIN;
3641       }
3642 
3643       if (memcmp (&data[4], "moov", 4) == 0)
3644         have_moov = TRUE;
3645       if (memcmp (&data[4], "mdat", 4) == 0)
3646         have_mdat = TRUE;
3647 
3648       atoms_in_a_row += 1;
3649       if ((have_moov && have_mdat) || atoms_in_a_row >= 5) {
3650         tip = GST_TYPE_FIND_MAXIMUM;
3651         break;
3652       }
3653     }
3654     /* other box/atom types, apparently quicktime specific */
3655     else if (memcmp (&data[4], "pnot", 4) == 0 ||
3656         memcmp (&data[4], "PICT", 4) == 0 ||
3657         memcmp (&data[4], "wide", 4) == 0 ||
3658         memcmp (&data[4], "prfl", 4) == 0) {
3659       tip = GST_TYPE_FIND_MAXIMUM;
3660       break;
3661     } else {
3662       if (atoms_in_a_row >= 3)
3663         tip = GST_TYPE_FIND_LIKELY;
3664       else
3665         tip = 0;
3666       break;
3667     }
3668 
3669     size = GST_READ_UINT32_BE (data);
3670     if (size + offset >= G_MAXINT64)
3671       break;
3672     /* check compatible brands rather than ever expanding major brands above */
3673     if ((memcmp (&data[4], "ftyp", 4) == 0) && (size >= 16)) {
3674       data = gst_type_find_peek (tf, offset, size);
3675       if (data == NULL)
3676         goto done;
3677       new_offset = 12;
3678       while (new_offset + 4 <= size) {
3679         if (ftyp_brand_is (&data[new_offset], cmaf_brands,
3680                 G_N_ELEMENTS (cmaf_brands))) {
3681           tip = GST_TYPE_FIND_MAXIMUM;
3682           variant = "cmaf";
3683           goto done;
3684         }
3685 
3686         if (ftyp_brand_is (&data[new_offset], iso_brands,
3687                 G_N_ELEMENTS (iso_brands))) {
3688           tip = GST_TYPE_FIND_MAXIMUM;
3689           variant = "iso";
3690           goto done;
3691         }
3692 
3693         if (ftyp_brand_is (&data[new_offset], iso_fragmented_brands,
3694                 G_N_ELEMENTS (iso_fragmented_brands))) {
3695           tip = GST_TYPE_FIND_MAXIMUM;
3696           variant = "iso-fragmented";
3697           goto done;
3698         }
3699 
3700         if (ftyp_brand_is (&data[new_offset], heif_brands,
3701                 G_N_ELEMENTS (heif_brands))) {
3702           tip = GST_TYPE_FIND_MAXIMUM;
3703           variant = "heif";
3704           goto done;
3705         }
3706 
3707         new_offset += 4;
3708       }
3709     }
3710     if (size == 1) {
3711       const guint8 *sizedata;
3712 
3713       sizedata = gst_type_find_peek (tf, offset + 8, 8);
3714       if (sizedata == NULL)
3715         break;
3716 
3717       size = GST_READ_UINT64_BE (sizedata);
3718     } else {
3719       if (size < 8)
3720         break;
3721     }
3722     new_offset = offset + size;
3723     if (new_offset <= offset)
3724       break;
3725     if (new_offset + 16 >= G_MAXINT64)
3726       break;
3727     offset = new_offset;
3728   }
3729 
3730 done:
3731   if (tip > 0) {
3732     if (variant) {
3733       GstCaps *caps = gst_caps_copy (QT_CAPS);
3734 
3735       gst_caps_set_simple (caps, "variant", G_TYPE_STRING, variant, NULL);
3736       gst_type_find_suggest (tf, tip, caps);
3737       gst_caps_unref (caps);
3738     } else {
3739       gst_type_find_suggest (tf, tip, QT_CAPS);
3740     }
3741   }
3742 };
3743 
3744 
3745 /*** image/x-quicktime ***/
3746 
3747 static GstStaticCaps qtif_caps = GST_STATIC_CAPS ("image/x-quicktime");
3748 
3749 #define QTIF_CAPS gst_static_caps_get(&qtif_caps)
3750 
3751 /* how many atoms we check before we give up */
3752 #define QTIF_MAXROUNDS 25
3753 
3754 static void
qtif_type_find(GstTypeFind * tf,gpointer unused)3755 qtif_type_find (GstTypeFind * tf, gpointer unused)
3756 {
3757   const guint8 *data;
3758   gboolean found_idsc = FALSE;
3759   gboolean found_idat = FALSE;
3760   guint64 offset = 0;
3761   guint rounds = 0;
3762 
3763   while ((data = gst_type_find_peek (tf, offset, 8)) != NULL) {
3764     guint64 size;
3765 
3766     size = GST_READ_UINT32_BE (data);
3767     if (size == 1) {
3768       const guint8 *sizedata;
3769 
3770       sizedata = gst_type_find_peek (tf, offset + 8, 8);
3771       if (sizedata == NULL)
3772         break;
3773 
3774       size = GST_READ_UINT64_BE (sizedata);
3775     }
3776     if (size < 8)
3777       break;
3778 
3779     if (memcmp (data + 4, "idsc", 4) == 0)
3780       found_idsc = TRUE;
3781     if (memcmp (data + 4, "idat", 4) == 0)
3782       found_idat = TRUE;
3783 
3784     if (found_idsc && found_idat) {
3785       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, QTIF_CAPS);
3786       return;
3787     }
3788 
3789     offset += size;
3790     if (offset + 8 >= G_MAXINT64)
3791       break;
3792     if (++rounds > QTIF_MAXROUNDS)
3793       break;
3794   }
3795 
3796   if (found_idsc || found_idat) {
3797     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, QTIF_CAPS);
3798     return;
3799   }
3800 };
3801 
3802 /*** audio/x-mod ***/
3803 
3804 static GstStaticCaps mod_caps = GST_STATIC_CAPS ("audio/x-mod");
3805 
3806 #define MOD_CAPS gst_static_caps_get(&mod_caps)
3807 /* FIXME: M15 CheckType to do */
3808 static void
mod_type_find(GstTypeFind * tf,gpointer unused)3809 mod_type_find (GstTypeFind * tf, gpointer unused)
3810 {
3811   const guint8 *data;
3812   GstTypeFindProbability probability;
3813   const char *mod_type = NULL;
3814 
3815   /* MOD */
3816   if ((data = gst_type_find_peek (tf, 1080, 4)) != NULL) {
3817     /* Protracker and variants */
3818     if ((memcmp (data, "M.K.", 4) == 0) ||
3819         (memcmp (data, "M!K!", 4) == 0) ||
3820         (memcmp (data, "M&K!", 4) == 0) || (memcmp (data, "N.T.", 4) == 0) ||
3821         /* Star Tracker */
3822         (memcmp (data, "FLT", 3) == 0 && isdigit (data[3])) ||
3823         (memcmp (data, "EXO", 3) == 0 && isdigit (data[3])) ||
3824         /* Oktalyzer (Amiga) */
3825         (memcmp (data, "OKTA", 4) == 0) || (memcmp (data, "OCTA", 4) == 0) ||
3826         /* Oktalyser (Atari) */
3827         (memcmp (data, "CD81", 4) == 0) ||
3828         /* Taketracker */
3829         (memcmp (data, "TDZ", 3) == 0 && isdigit (data[3])) ||
3830         /* Fasttracker */
3831         (memcmp (data + 1, "CHN", 3) == 0 && isdigit (data[0])) ||
3832         /* Fasttracker or Taketracker */
3833         (memcmp (data + 2, "CH", 2) == 0 && isdigit (data[0])
3834             && isdigit (data[1])) || (memcmp (data + 2, "CN", 2) == 0
3835             && isdigit (data[0]) && isdigit (data[1]))) {
3836       mod_type = "mod";
3837       probability = GST_TYPE_FIND_MAXIMUM;
3838       goto suggest_audio_mod_caps;
3839     }
3840   }
3841   /* J2B (Jazz Jackrabbit 2) */
3842   if ((data = gst_type_find_peek (tf, 0, 8)) != NULL) {
3843     if ((memcmp (data, "MUSE\xDE\xAD", 4) == 0) &&
3844         ((memcmp (data + 6, "\xBE\xEF", 2) == 0) ||
3845             (memcmp (data + 6, "\xBA\xBE", 2) == 0))) {
3846       mod_type = "j2b";
3847       probability = GST_TYPE_FIND_MAXIMUM;
3848       goto suggest_audio_mod_caps;
3849     }
3850   }
3851   /* AMS (Velvet Studio) */
3852   if ((data = gst_type_find_peek (tf, 0, 7)) != NULL) {
3853     if (memcmp (data, "AMShdr\x1A", 7) == 0) {
3854       mod_type = "velvet-ams";
3855       probability = GST_TYPE_FIND_MAXIMUM;
3856       goto suggest_audio_mod_caps;
3857     }
3858   }
3859   /* AMS (Extreme Tracker) */
3860   if ((data = gst_type_find_peek (tf, 0, 9)) != NULL) {
3861     if ((memcmp (data, "Extreme", 7) == 0) && (data[8] == 1)) {
3862       mod_type = "extreme-ams";
3863       probability = GST_TYPE_FIND_LIKELY;
3864       goto suggest_audio_mod_caps;
3865     }
3866   }
3867   /* ULT (Ultratracker) */
3868   if ((data = gst_type_find_peek (tf, 0, 14)) != NULL) {
3869     if (memcmp (data, "MAS_UTrack_V00", 14) == 0) {
3870       mod_type = "ult";
3871       probability = GST_TYPE_FIND_MAXIMUM;
3872       goto suggest_audio_mod_caps;
3873     }
3874   }
3875   /* DIGI (DigiBooster) */
3876   if ((data = gst_type_find_peek (tf, 0, 20)) != NULL) {
3877     if (memcmp (data, "DIGI Booster module\0", 20) == 0) {
3878       mod_type = "digi";
3879       probability = GST_TYPE_FIND_MAXIMUM;
3880       goto suggest_audio_mod_caps;
3881     }
3882   }
3883   /* PTM (PolyTracker) */
3884   if ((data = gst_type_find_peek (tf, 0x2C, 4)) != NULL) {
3885     if (memcmp (data, "PTMF", 4) == 0) {
3886       mod_type = "ptm";
3887       probability = GST_TYPE_FIND_LIKELY;
3888       goto suggest_audio_mod_caps;
3889     }
3890   }
3891   /* XM */
3892   if ((data = gst_type_find_peek (tf, 0, 38)) != NULL) {
3893     if ((memcmp (data, "Extended Module: ", 17) == 0) && (data[37] == 0x1A)) {
3894       mod_type = "xm";
3895       probability = GST_TYPE_FIND_MAXIMUM;
3896       goto suggest_audio_mod_caps;
3897     }
3898   }
3899   /* OKT */
3900   if (data || (data = gst_type_find_peek (tf, 0, 8)) != NULL) {
3901     if (memcmp (data, "OKTASONG", 8) == 0) {
3902       mod_type = "okt";
3903       probability = GST_TYPE_FIND_MAXIMUM;
3904       goto suggest_audio_mod_caps;
3905     }
3906   }
3907   /* Various formats with a 4-byte magic ID at the beginning of the file */
3908   if (data || (data = gst_type_find_peek (tf, 0, 4)) != NULL) {
3909     /* PSM (Protracker Studio PSM) */
3910     if (memcmp (data, "PSM", 3) == 0) {
3911       unsigned char fbyte = data[3];
3912       if ((fbyte == ' ') || (fbyte == 254)) {
3913         mod_type = "psm";
3914         probability = GST_TYPE_FIND_MAXIMUM;
3915         goto suggest_audio_mod_caps;
3916       }
3917     }
3918     /* 669 */
3919     if ((memcmp (data, "if", 2) == 0) || (memcmp (data, "JN", 2) == 0)) {
3920       mod_type = "669";
3921       probability = GST_TYPE_FIND_LIKELY;
3922       goto suggest_audio_mod_caps;
3923     }
3924     /* AMF */
3925     if ((memcmp (data, "AMF", 3) == 0) && (data[3] > 10) && (data[3] < 14)) {
3926       mod_type = "dsmi-amf";
3927       probability = GST_TYPE_FIND_MAXIMUM;
3928       goto suggest_audio_mod_caps;
3929     }
3930     /* IT */
3931     if (memcmp (data, "IMPM", 4) == 0) {
3932       mod_type = "it";
3933       probability = GST_TYPE_FIND_MAXIMUM;
3934       goto suggest_audio_mod_caps;
3935     }
3936     /* DBM (DigiBooster Pro) */
3937     if (memcmp (data, "DBM0", 4) == 0) {
3938       mod_type = "dbm";
3939       probability = GST_TYPE_FIND_MAXIMUM;
3940       goto suggest_audio_mod_caps;
3941     }
3942     /* MDL (DigiTrakker) */
3943     if (memcmp (data, "DMDL", 4) == 0) {
3944       mod_type = "mdl";
3945       probability = GST_TYPE_FIND_MAXIMUM;
3946       goto suggest_audio_mod_caps;
3947     }
3948     /* MT2 (MadTracker 2.0) */
3949     if (memcmp (data, "MT20", 4) == 0) {
3950       mod_type = "mt2";
3951       probability = GST_TYPE_FIND_MAXIMUM;
3952       goto suggest_audio_mod_caps;
3953     }
3954     /* DMF (X-Tracker) */
3955     if (memcmp (data, "DDMF", 4) == 0) {
3956       mod_type = "dmf";
3957       probability = GST_TYPE_FIND_MAXIMUM;
3958       goto suggest_audio_mod_caps;
3959     }
3960     /* MED */
3961     if ((memcmp (data, "MMD0", 4) == 0) || (memcmp (data, "MMD1", 4) == 0)) {
3962       mod_type = "med";
3963       probability = GST_TYPE_FIND_MAXIMUM;
3964       goto suggest_audio_mod_caps;
3965     }
3966     /* MTM */
3967     if (memcmp (data, "MTM", 3) == 0) {
3968       mod_type = "mtm";
3969       probability = GST_TYPE_FIND_MAXIMUM;
3970       goto suggest_audio_mod_caps;
3971     }
3972     /* DSM */
3973     if (memcmp (data, "RIFF", 4) == 0) {
3974       const guint8 *data2 = gst_type_find_peek (tf, 8, 4);
3975 
3976       if (data2) {
3977         if (memcmp (data2, "DSMF", 4) == 0) {
3978           mod_type = "dsm";
3979           probability = GST_TYPE_FIND_MAXIMUM;
3980           goto suggest_audio_mod_caps;
3981         }
3982       }
3983     }
3984     /* FAR (Farandole) */
3985     if (memcmp (data, "FAR\xFE", 4) == 0) {
3986       mod_type = "far";
3987       probability = GST_TYPE_FIND_MAXIMUM;
3988       goto suggest_audio_mod_caps;
3989     }
3990     /* FAM */
3991     if (memcmp (data, "FAM\xFE", 4) == 0) {
3992       const guint8 *data2 = gst_type_find_peek (tf, 44, 3);
3993 
3994       if (data2) {
3995         if (memcmp (data2, "compare", 3) == 0) {
3996           mod_type = "fam";
3997           probability = GST_TYPE_FIND_MAXIMUM;
3998           goto suggest_audio_mod_caps;
3999         }
4000         /* otherwise do not suggest anything */
4001       } else {
4002         mod_type = "fam";
4003         probability = GST_TYPE_FIND_LIKELY;
4004         goto suggest_audio_mod_caps;
4005       }
4006     }
4007     /* GDM */
4008     if (memcmp (data, "GDM\xFE", 4) == 0) {
4009       const guint8 *data2 = gst_type_find_peek (tf, 71, 4);
4010 
4011       if (data2) {
4012         if (memcmp (data2, "GMFS", 4) == 0) {
4013           mod_type = "gdm";
4014           probability = GST_TYPE_FIND_MAXIMUM;
4015           goto suggest_audio_mod_caps;
4016         }
4017         /* otherwise do not suggest anything */
4018       } else {
4019         mod_type = "gdm";
4020         probability = GST_TYPE_FIND_LIKELY;
4021         goto suggest_audio_mod_caps;
4022       }
4023     }
4024     /* UMX */
4025     if (memcmp (data, "\xC1\x83\x2A\x9E", 4) == 0) {
4026       mod_type = "umx";
4027       probability = GST_TYPE_FIND_POSSIBLE;
4028       goto suggest_audio_mod_caps;
4029     }
4030   }
4031   /* FAR (Farandole) (secondary detection) */
4032   if ((data = gst_type_find_peek (tf, 44, 3)) != NULL) {
4033     if (memcmp (data, "\x0D\x0A\x1A", 3) == 0) {
4034       mod_type = "far";
4035       probability = GST_TYPE_FIND_POSSIBLE;
4036       goto suggest_audio_mod_caps;
4037     }
4038   }
4039   /* IMF */
4040   if ((data = gst_type_find_peek (tf, 60, 4)) != NULL) {
4041     if (memcmp (data, "IM10", 4) == 0) {
4042       mod_type = "imf";
4043       probability = GST_TYPE_FIND_MAXIMUM;
4044       goto suggest_audio_mod_caps;
4045     }
4046   }
4047   /* S3M */
4048   if ((data = gst_type_find_peek (tf, 44, 4)) != NULL) {
4049     if (memcmp (data, "SCRM", 4) == 0) {
4050       mod_type = "s3m";
4051       probability = GST_TYPE_FIND_MAXIMUM;
4052       goto suggest_audio_mod_caps;
4053     }
4054   }
4055   /* STM */
4056   if ((data = gst_type_find_peek (tf, 20, 8)) != NULL) {
4057     if (g_ascii_strncasecmp ((gchar *) data, "!Scream!", 8) == 0 ||
4058         g_ascii_strncasecmp ((gchar *) data, "BMOD2STM", 8) == 0) {
4059       const guint8 *id, *stmtype;
4060 
4061       if ((id = gst_type_find_peek (tf, 28, 1)) == NULL)
4062         return;
4063       if ((stmtype = gst_type_find_peek (tf, 29, 1)) == NULL)
4064         return;
4065       if (*id == 0x1A && *stmtype == 2) {
4066         mod_type = "stm";
4067         probability = GST_TYPE_FIND_MAXIMUM;
4068         goto suggest_audio_mod_caps;
4069       }
4070     }
4071   }
4072   /* AMF */
4073   if ((data = gst_type_find_peek (tf, 0, 19)) != NULL) {
4074     if (memcmp (data, "ASYLUM Music Format", 19) == 0) {
4075       mod_type = "asylum-amf";
4076       probability = GST_TYPE_FIND_MAXIMUM;
4077       goto suggest_audio_mod_caps;
4078     }
4079   }
4080 
4081 suggest_audio_mod_caps:
4082   if (mod_type != NULL) {
4083     GstCaps *caps = gst_caps_new_simple ("audio/x-mod",
4084         "type", G_TYPE_STRING, mod_type, NULL);
4085 
4086     gst_type_find_suggest (tf, probability, caps);
4087     gst_caps_unref (caps);
4088   }
4089 }
4090 
4091 /*** application/x-shockwave-flash ***/
4092 
4093 static GstStaticCaps swf_caps =
4094 GST_STATIC_CAPS ("application/x-shockwave-flash");
4095 #define SWF_CAPS (gst_static_caps_get(&swf_caps))
4096 static void
swf_type_find(GstTypeFind * tf,gpointer unused)4097 swf_type_find (GstTypeFind * tf, gpointer unused)
4098 {
4099   const guint8 *data = gst_type_find_peek (tf, 0, 4);
4100 
4101   if (data && (data[0] == 'F' || data[0] == 'C') &&
4102       data[1] == 'W' && data[2] == 'S') {
4103     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SWF_CAPS);
4104   }
4105 }
4106 
4107 /*** application/vnd.ms-sstr+xml ***/
4108 
4109 static void
mss_manifest_load_utf16(gunichar2 * utf16_ne,const guint8 * utf16_data,gsize data_size,guint data_endianness)4110 mss_manifest_load_utf16 (gunichar2 * utf16_ne, const guint8 * utf16_data,
4111     gsize data_size, guint data_endianness)
4112 {
4113   memcpy (utf16_ne, utf16_data, data_size);
4114   if (data_endianness != G_BYTE_ORDER) {
4115     guint i;
4116 
4117     for (i = 0; i < data_size / 2; ++i)
4118       utf16_ne[i] = GUINT16_SWAP_LE_BE (utf16_ne[i]);
4119   }
4120 }
4121 
4122 static GstStaticCaps mss_manifest_caps =
4123 GST_STATIC_CAPS ("application/vnd.ms-sstr+xml");
4124 #define MSS_MANIFEST_CAPS (gst_static_caps_get(&mss_manifest_caps))
4125 static void
mss_manifest_type_find(GstTypeFind * tf,gpointer unused)4126 mss_manifest_type_find (GstTypeFind * tf, gpointer unused)
4127 {
4128   gunichar2 utf16_ne[512];
4129   const guint8 *data;
4130   guint data_endianness = 0;
4131   glong n_read = 0, size = 0;
4132   guint length;
4133   gchar *utf8;
4134   gboolean utf8_bom_detected = FALSE;
4135 
4136   if (xml_check_first_element (tf, "SmoothStreamingMedia", 20, TRUE)) {
4137     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
4138     return;
4139   }
4140 
4141   length = gst_type_find_get_length (tf);
4142 
4143   /* try detecting the charset */
4144   data = gst_type_find_peek (tf, 0, 3);
4145 
4146   if (data == NULL)
4147     return;
4148 
4149   /* look for a possible BOM */
4150   if (data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF)
4151     utf8_bom_detected = TRUE;
4152   else if (data[0] == 0xFF && data[1] == 0xFE)
4153     data_endianness = G_LITTLE_ENDIAN;
4154   else if (data[0] == 0xFE && data[1] == 0xFF)
4155     data_endianness = G_BIG_ENDIAN;
4156   else
4157     return;
4158 
4159   /* try a default that should be enough */
4160   if (length == 0)
4161     length = 512;
4162   else if (length < 64)
4163     return;
4164   else                          /* the first few bytes should be enough */
4165     length = MIN (1024, length);
4166 
4167   data = gst_type_find_peek (tf, 0, length);
4168 
4169   if (data == NULL)
4170     return;
4171 
4172   /* skip the BOM */
4173   data += 2;
4174   length -= 2;
4175 
4176   if (utf8_bom_detected) {
4177     /* skip last byte of the BOM */
4178     data++;
4179     length--;
4180 
4181     if (xml_check_first_element_from_data (data, length,
4182             "SmoothStreamingMedia", 20, TRUE))
4183       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
4184   } else {
4185     length = GST_ROUND_DOWN_2 (length);
4186 
4187     /* convert to native endian UTF-16 */
4188     mss_manifest_load_utf16 (utf16_ne, data, length, data_endianness);
4189 
4190     /* and now convert to UTF-8 */
4191     utf8 = g_utf16_to_utf8 (utf16_ne, length / 2, &n_read, &size, NULL);
4192     if (utf8 != NULL && n_read > 0) {
4193       if (xml_check_first_element_from_data ((const guint8 *) utf8, size,
4194               "SmoothStreamingMedia", 20, TRUE))
4195         gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MSS_MANIFEST_CAPS);
4196     }
4197     g_free (utf8);
4198   }
4199 }
4200 
4201 /*** image/jpeg ***/
4202 
4203 #define JPEG_MARKER_IS_START_OF_FRAME(x) \
4204     ((x)>=0xc0 && (x) <= 0xcf && (x)!=0xc4 && (x)!=0xc8 && (x)!=0xcc)
4205 
4206 static GstStaticCaps jpeg_caps = GST_STATIC_CAPS ("image/jpeg");
4207 
4208 #define JPEG_CAPS (gst_static_caps_get(&jpeg_caps))
4209 static void
jpeg_type_find(GstTypeFind * tf,gpointer unused)4210 jpeg_type_find (GstTypeFind * tf, gpointer unused)
4211 {
4212   GstTypeFindProbability prob = GST_TYPE_FIND_POSSIBLE;
4213   DataScanCtx c = { 0, NULL, 0 };
4214   GstCaps *caps;
4215   guint num_markers;
4216 
4217   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 2)))
4218     return;
4219 
4220   if (c.data[0] != 0xff || c.data[1] != 0xd8)
4221     return;
4222 
4223   num_markers = 1;
4224   data_scan_ctx_advance (tf, &c, 2);
4225 
4226   caps = gst_caps_copy (JPEG_CAPS);
4227 
4228   while (data_scan_ctx_ensure_data (tf, &c, 4) && c.offset < (200 * 1024)) {
4229     guint16 len;
4230     guint8 marker;
4231 
4232     if (c.data[0] != 0xff)
4233       break;
4234 
4235     marker = c.data[1];
4236     if (G_UNLIKELY (marker == 0xff)) {
4237       data_scan_ctx_advance (tf, &c, 1);
4238       continue;
4239     }
4240 
4241     data_scan_ctx_advance (tf, &c, 2);
4242 
4243     /* we assume all markers we'll see before SOF have a payload length; if
4244      * that's not the case we'll just detect a false sync and bail out, but
4245      * still report POSSIBLE probability */
4246     len = GST_READ_UINT16_BE (c.data);
4247 
4248     GST_LOG ("possible JPEG marker 0x%02x (@0x%04x), segment length %u",
4249         marker, (guint) c.offset, len);
4250 
4251     if (!data_scan_ctx_ensure_data (tf, &c, len))
4252       break;
4253 
4254     if (marker == 0xc4 ||       /* DEFINE_HUFFMAN_TABLES          */
4255         marker == 0xcc ||       /* DEFINE_ARITHMETIC_CONDITIONING */
4256         marker == 0xdb ||       /* DEFINE_QUANTIZATION_TABLES     */
4257         marker == 0xdd ||       /* DEFINE_RESTART_INTERVAL        */
4258         marker == 0xfe) {       /* COMMENT                        */
4259       data_scan_ctx_advance (tf, &c, len);
4260       ++num_markers;
4261     } else if (marker == 0xe0 && len >= (2 + 4) &&      /* APP0 */
4262         data_scan_ctx_memcmp (tf, &c, 2, "JFIF", 4)) {
4263       GST_LOG ("found JFIF tag");
4264       prob = GST_TYPE_FIND_MAXIMUM;
4265       data_scan_ctx_advance (tf, &c, len);
4266       ++num_markers;
4267       /* we continue until we find a start of frame marker */
4268     } else if (marker == 0xe1 && len >= (2 + 4) &&      /* APP1 */
4269         data_scan_ctx_memcmp (tf, &c, 2, "Exif", 4)) {
4270       GST_LOG ("found Exif tag");
4271       prob = GST_TYPE_FIND_MAXIMUM;
4272       data_scan_ctx_advance (tf, &c, len);
4273       ++num_markers;
4274       /* we continue until we find a start of frame marker */
4275     } else if (marker >= 0xe0 && marker <= 0xef) {      /* APPn */
4276       data_scan_ctx_advance (tf, &c, len);
4277       ++num_markers;
4278     } else if (JPEG_MARKER_IS_START_OF_FRAME (marker) && len >= (2 + 8)) {
4279       int h, w;
4280 
4281       h = GST_READ_UINT16_BE (c.data + 2 + 1);
4282       w = GST_READ_UINT16_BE (c.data + 2 + 1 + 2);
4283       if (h == 0 || w == 0) {
4284         GST_WARNING ("bad width %u and/or height %u in SOF header", w, h);
4285         break;
4286       }
4287 
4288       GST_LOG ("SOF at offset %" G_GUINT64_FORMAT ", num_markers=%d, "
4289           "WxH=%dx%d", c.offset - 2, num_markers, w, h);
4290 
4291       if (num_markers >= 5 || prob == GST_TYPE_FIND_MAXIMUM)
4292         prob = GST_TYPE_FIND_MAXIMUM;
4293       else
4294         prob = GST_TYPE_FIND_LIKELY;
4295 
4296       gst_caps_set_simple (caps, "width", G_TYPE_INT, w,
4297           "height", G_TYPE_INT, h, "sof-marker", G_TYPE_INT, marker & 0xf,
4298           NULL);
4299 
4300       break;
4301     } else {
4302       GST_WARNING ("bad length or unexpected JPEG marker 0xff 0x%02x", marker);
4303       break;
4304     }
4305   }
4306 
4307   gst_type_find_suggest (tf, prob, caps);
4308   gst_caps_unref (caps);
4309 }
4310 
4311 /*** image/bmp ***/
4312 
4313 static GstStaticCaps bmp_caps = GST_STATIC_CAPS ("image/bmp");
4314 
4315 #define BMP_CAPS (gst_static_caps_get(&bmp_caps))
4316 static void
bmp_type_find(GstTypeFind * tf,gpointer unused)4317 bmp_type_find (GstTypeFind * tf, gpointer unused)
4318 {
4319   DataScanCtx c = { 0, NULL, 0 };
4320   guint32 struct_size, w, h, planes, bpp;
4321 
4322   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 54)))
4323     return;
4324 
4325   if (c.data[0] != 'B' || c.data[1] != 'M')
4326     return;
4327 
4328   /* skip marker + size */
4329   data_scan_ctx_advance (tf, &c, 2 + 4);
4330 
4331   /* reserved, must be 0 */
4332   if (c.data[0] != 0 || c.data[1] != 0 || c.data[2] != 0 || c.data[3] != 0)
4333     return;
4334 
4335   data_scan_ctx_advance (tf, &c, 2 + 2);
4336 
4337   /* offset to start of image data in bytes (check for sanity) */
4338   GST_LOG ("offset=%u", GST_READ_UINT32_LE (c.data));
4339   if (GST_READ_UINT32_LE (c.data) > (10 * 1024 * 1024))
4340     return;
4341 
4342   struct_size = GST_READ_UINT32_LE (c.data + 4);
4343   GST_LOG ("struct_size=%u", struct_size);
4344 
4345   data_scan_ctx_advance (tf, &c, 4 + 4);
4346 
4347   if (struct_size == 0x0C) {
4348     w = GST_READ_UINT16_LE (c.data);
4349     h = GST_READ_UINT16_LE (c.data + 2);
4350     planes = GST_READ_UINT16_LE (c.data + 2 + 2);
4351     bpp = GST_READ_UINT16_LE (c.data + 2 + 2 + 2);
4352   } else if (struct_size == 40 || struct_size == 64 || struct_size == 108
4353       || struct_size == 124 || struct_size == 0xF0) {
4354     w = GST_READ_UINT32_LE (c.data);
4355     h = GST_READ_UINT32_LE (c.data + 4);
4356     planes = GST_READ_UINT16_LE (c.data + 4 + 4);
4357     bpp = GST_READ_UINT16_LE (c.data + 4 + 4 + 2);
4358   } else {
4359     return;
4360   }
4361 
4362   /* image sizes sanity check */
4363   GST_LOG ("w=%u, h=%u, planes=%u, bpp=%u", w, h, planes, bpp);
4364   if (w == 0 || w > 0xfffff || h == 0 || h > 0xfffff || planes != 1 ||
4365       (bpp != 1 && bpp != 4 && bpp != 8 && bpp != 16 && bpp != 24 && bpp != 32))
4366     return;
4367 
4368   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, "image/bmp",
4369       "width", G_TYPE_INT, w, "height", G_TYPE_INT, h, "bpp", G_TYPE_INT, bpp,
4370       NULL);
4371 }
4372 
4373 /*** image/tiff ***/
4374 static GstStaticCaps tiff_caps = GST_STATIC_CAPS ("image/tiff, "
4375     "endianness = (int) { BIG_ENDIAN, LITTLE_ENDIAN }");
4376 #define TIFF_CAPS (gst_static_caps_get(&tiff_caps))
4377 static GstStaticCaps tiff_be_caps = GST_STATIC_CAPS ("image/tiff, "
4378     "endianness = (int) BIG_ENDIAN");
4379 #define TIFF_BE_CAPS (gst_static_caps_get(&tiff_be_caps))
4380 static GstStaticCaps tiff_le_caps = GST_STATIC_CAPS ("image/tiff, "
4381     "endianness = (int) LITTLE_ENDIAN");
4382 #define TIFF_LE_CAPS (gst_static_caps_get(&tiff_le_caps))
4383 static void
tiff_type_find(GstTypeFind * tf,gpointer unused)4384 tiff_type_find (GstTypeFind * tf, gpointer unused)
4385 {
4386   const guint8 *data = gst_type_find_peek (tf, 0, 8);
4387   guint8 le_header[4] = { 0x49, 0x49, 0x2A, 0x00 };
4388   guint8 be_header[4] = { 0x4D, 0x4D, 0x00, 0x2A };
4389 
4390   if (data) {
4391     if (memcmp (data, le_header, 4) == 0) {
4392       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TIFF_LE_CAPS);
4393     } else if (memcmp (data, be_header, 4) == 0) {
4394       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TIFF_BE_CAPS);
4395     }
4396   }
4397 }
4398 
4399 /*** image/x-exr ***/
4400 static GstStaticCaps exr_caps = GST_STATIC_CAPS ("image/x-exr");
4401 #define EXR_CAPS (gst_static_caps_get(&exr_caps))
4402 static void
exr_type_find(GstTypeFind * tf,gpointer unused)4403 exr_type_find (GstTypeFind * tf, gpointer unused)
4404 {
4405   const guint8 *data = gst_type_find_peek (tf, 0, 8);
4406 
4407   if (data) {
4408     guint32 flags;
4409 
4410     if (GST_READ_UINT32_LE (data) != 0x01312f76)
4411       return;
4412 
4413     flags = GST_READ_UINT32_LE (data + 4);
4414     if ((flags & 0xff) != 1 && (flags & 0xff) != 2)
4415       return;
4416 
4417     /* If bit 9 is set, bit 11 and 12 must be 0 */
4418     if ((flags & 0x200) && (flags & 0x1800))
4419       return;
4420 
4421     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, EXR_CAPS);
4422   }
4423 }
4424 
4425 
4426 /*** PNM ***/
4427 
4428 static GstStaticCaps pnm_caps = GST_STATIC_CAPS ("image/x-portable-bitmap; "
4429     "image/x-portable-graymap; image/x-portable-pixmap; "
4430     "image/x-portable-anymap");
4431 
4432 #define PNM_CAPS (gst_static_caps_get(&pnm_caps))
4433 
4434 #define IS_PNM_WHITESPACE(c) \
4435     ((c) == ' ' || (c) == '\r' || (c) == '\n' || (c) == 't')
4436 
4437 static void
pnm_type_find(GstTypeFind * tf,gpointer unused)4438 pnm_type_find (GstTypeFind * tf, gpointer unused)
4439 {
4440   const gchar *media_type = NULL;
4441   DataScanCtx c = { 0, NULL, 0 };
4442   guint h = 0, w = 0;
4443 
4444   if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 16)))
4445     return;
4446 
4447   /* see http://en.wikipedia.org/wiki/Netpbm_format */
4448   if (c.data[0] != 'P' || c.data[1] < '1' || c.data[1] > '7' ||
4449       !IS_PNM_WHITESPACE (c.data[2]) ||
4450       (c.data[3] != '#' && c.data[3] < '0' && c.data[3] > '9'))
4451     return;
4452 
4453   switch (c.data[1]) {
4454     case '1':
4455       media_type = "image/x-portable-bitmap";   /* ASCII */
4456       break;
4457     case '2':
4458       media_type = "image/x-portable-graymap";  /* ASCII */
4459       break;
4460     case '3':
4461       media_type = "image/x-portable-pixmap";   /* ASCII */
4462       break;
4463     case '4':
4464       media_type = "image/x-portable-bitmap";   /* Raw */
4465       break;
4466     case '5':
4467       media_type = "image/x-portable-graymap";  /* Raw */
4468       break;
4469     case '6':
4470       media_type = "image/x-portable-pixmap";   /* Raw */
4471       break;
4472     case '7':
4473       media_type = "image/x-portable-anymap";
4474       break;
4475     default:
4476       g_return_if_reached ();
4477   }
4478 
4479   /* try to extract width and height as well */
4480   if (c.data[1] != '7') {
4481     gchar s[64] = { 0, }
4482     , sep1, sep2;
4483 
4484     /* need to skip any comment lines first */
4485     data_scan_ctx_advance (tf, &c, 3);
4486 
4487     if (!data_scan_ctx_ensure_data (tf, &c, 1))
4488       return;
4489 
4490     while (c.data[0] == '#') {  /* we know there's still data left */
4491       data_scan_ctx_advance (tf, &c, 1);
4492       if (!data_scan_ctx_ensure_data (tf, &c, 1))
4493         return;
4494 
4495       while (c.data[0] != '\n' && c.data[0] != '\r') {
4496         data_scan_ctx_advance (tf, &c, 1);
4497         if (!data_scan_ctx_ensure_data (tf, &c, 1))
4498           return;
4499       }
4500       data_scan_ctx_advance (tf, &c, 1);
4501       GST_LOG ("skipped comment line in PNM header");
4502       if (!data_scan_ctx_ensure_data (tf, &c, 1))
4503         return;
4504     }
4505 
4506     if (!data_scan_ctx_ensure_data (tf, &c, 32) &&
4507         !data_scan_ctx_ensure_data (tf, &c, 4)) {
4508       return;
4509     }
4510 
4511     /* need to NUL-terminate data for sscanf */
4512     memcpy (s, c.data, MIN (sizeof (s) - 1, c.size));
4513     if (sscanf (s, "%u%c%u%c", &w, &sep1, &h, &sep2) == 4 &&
4514         IS_PNM_WHITESPACE (sep1) && IS_PNM_WHITESPACE (sep2) &&
4515         w > 0 && w < G_MAXINT && h > 0 && h < G_MAXINT) {
4516       GST_LOG ("extracted PNM width and height: %dx%d", w, h);
4517     } else {
4518       w = 0;
4519       h = 0;
4520     }
4521   } else {
4522     /* FIXME: extract width + height for anymaps too */
4523   }
4524 
4525   if (w > 0 && h > 0) {
4526     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, media_type,
4527         "width", G_TYPE_INT, w, "height", G_TYPE_INT, h, NULL);
4528   } else {
4529     gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_LIKELY, media_type);
4530   }
4531 }
4532 
4533 static GstStaticCaps sds_caps = GST_STATIC_CAPS ("audio/x-sds");
4534 
4535 #define SDS_CAPS (gst_static_caps_get(&sds_caps))
4536 static void
sds_type_find(GstTypeFind * tf,gpointer unused)4537 sds_type_find (GstTypeFind * tf, gpointer unused)
4538 {
4539   const guint8 *data = gst_type_find_peek (tf, 0, 4);
4540   guint8 mask[4] = { 0xFF, 0xFF, 0x80, 0xFF };
4541   guint8 match[4] = { 0xF0, 0x7E, 0, 0x01 };
4542   gint x;
4543 
4544   if (data) {
4545     for (x = 0; x < 4; x++) {
4546       if ((data[x] & mask[x]) != match[x]) {
4547         return;
4548       }
4549     }
4550     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SDS_CAPS);
4551   }
4552 }
4553 
4554 static GstStaticCaps ircam_caps = GST_STATIC_CAPS ("audio/x-ircam");
4555 
4556 #define IRCAM_CAPS (gst_static_caps_get(&ircam_caps))
4557 static void
ircam_type_find(GstTypeFind * tf,gpointer unused)4558 ircam_type_find (GstTypeFind * tf, gpointer unused)
4559 {
4560   const guint8 *data = gst_type_find_peek (tf, 0, 4);
4561   guint8 mask[4] = { 0xFF, 0xFF, 0xF8, 0xFF };
4562   guint8 match[4] = { 0x64, 0xA3, 0x00, 0x00 };
4563   gint x;
4564   gboolean matched = TRUE;
4565 
4566   if (!data) {
4567     return;
4568   }
4569   for (x = 0; x < 4; x++) {
4570     if ((data[x] & mask[x]) != match[x]) {
4571       matched = FALSE;
4572     }
4573   }
4574   if (matched) {
4575     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, IRCAM_CAPS);
4576     return;
4577   }
4578   /* now try the reverse version */
4579   matched = TRUE;
4580   for (x = 0; x < 4; x++) {
4581     if ((data[x] & mask[3 - x]) != match[3 - x]) {
4582       matched = FALSE;
4583     }
4584   }
4585 }
4586 
4587 /*** Matroska/WebM ***/
4588 
4589 #define EBML_HEADER           0x1A45DFA3
4590 #define EBML_VERSION          0x4286
4591 #define EBML_DOCTYPE          0x4282
4592 #define EBML_DOCTYPE_VERSION  0x4287
4593 #define MATROSKA_SEGMENT      0x18538067
4594 #define MATROSKA_CLUSTER      0x1F43B675
4595 #define MATROSKA_TRACKS       0x1654AE6B
4596 #define MATROSKA_TRACK_ENTRY  0xAE
4597 #define MATROSKA_TRACK_TYPE   0x83
4598 #define MATROSKA_STEREO_MODE  0x53B8
4599 
4600 #define EBML_MAX_LEN (2 * 1024 * 1024)
4601 
4602 typedef enum
4603 {
4604   EBML_DOCTYPE_UNKNOWN = 0,
4605   EBML_DOCTYPE_MATROSKA,
4606   EBML_DOCTYPE_WEBM
4607 } GstEbmlDocType;
4608 
4609 typedef struct
4610 {
4611   GstEbmlDocType doctype;
4612   guint audio;
4613   guint video;
4614   guint other;
4615   guint video_stereo;
4616   guint chunks;
4617   guint tracks_ok;              /* if we've seen and fully parsed the TRACKS element */
4618 } GstMatroskaInfo;
4619 
4620 static inline guint
ebml_read_chunk_header(GstTypeFind * tf,DataScanCtx * c,guint max_size,guint32 * id,guint64 * size)4621 ebml_read_chunk_header (GstTypeFind * tf, DataScanCtx * c, guint max_size,
4622     guint32 * id, guint64 * size)
4623 {
4624   guint64 mask;
4625   guint msbit_set, i, len, id_len;
4626 
4627   if (c->size < 12 || max_size < 1)
4628     return 0;
4629 
4630   /* element ID */
4631   *id = c->data[0];
4632   if ((c->data[0] & 0x80) == 0x80) {
4633     id_len = 1;
4634   } else if ((c->data[0] & 0xC0) == 0x40) {
4635     id_len = 2;
4636   } else if ((c->data[0] & 0xE0) == 0x20) {
4637     id_len = 3;
4638   } else if ((c->data[0] & 0xF0) == 0x10) {
4639     id_len = 4;
4640   } else {
4641     return 0;
4642   }
4643 
4644   if (max_size < id_len)
4645     return 0;
4646 
4647   for (i = 1; i < id_len; ++i) {
4648     *id = (*id << 8) | c->data[i];
4649   }
4650 
4651   data_scan_ctx_advance (tf, c, id_len);
4652   max_size -= id_len;
4653 
4654   /* size */
4655   if (max_size < 1 || c->data[0] == 0)
4656     return 0;
4657 
4658   msbit_set = g_bit_nth_msf (c->data[0], 8);
4659   mask = ((1 << msbit_set) - 1);
4660   *size = c->data[0] & mask;
4661   len = 7 - msbit_set;
4662 
4663   if (max_size < 1 + len)
4664     return 0;
4665   for (i = 0; i < len; ++i) {
4666     mask = (mask << 8) | 0xff;
4667     *size = (*size << 8) | c->data[1 + i];
4668   }
4669 
4670   data_scan_ctx_advance (tf, c, 1 + len);
4671 
4672   /* undefined/unknown size? (all bits 1) */
4673   if (*size == mask) {
4674     /* allow unknown size for SEGMENT chunk, bail out otherwise */
4675     if (*id == MATROSKA_SEGMENT)
4676       *size = G_MAXUINT64;
4677     else
4678       return 0;
4679   }
4680 
4681   return id_len + (1 + len);
4682 }
4683 
4684 static gboolean
ebml_parse_chunk(GstTypeFind * tf,DataScanCtx * ctx,guint32 chunk_id,guint chunk_size,GstMatroskaInfo * info,guint depth)4685 ebml_parse_chunk (GstTypeFind * tf, DataScanCtx * ctx, guint32 chunk_id,
4686     guint chunk_size, GstMatroskaInfo * info, guint depth)
4687 {                               /* FIXME: make sure input size is clipped to 32 bit */
4688   static const gchar SPACES[] = "                ";
4689   DataScanCtx c = *ctx;
4690   guint64 element_size = 0;
4691   guint32 id, hdr_len;
4692 
4693   if (depth >= 8)               /* keep SPACES large enough for depth */
4694     return FALSE;
4695 
4696   while (chunk_size > 0) {
4697     if (c.offset > EBML_MAX_LEN || !data_scan_ctx_ensure_data (tf, &c, 64))
4698       return FALSE;
4699 
4700     hdr_len = ebml_read_chunk_header (tf, &c, chunk_size, &id, &element_size);
4701     if (hdr_len == 0)
4702       return FALSE;
4703 
4704     g_assert (hdr_len <= chunk_size);
4705     chunk_size -= hdr_len;
4706 
4707     if (element_size > chunk_size)
4708       return FALSE;
4709 
4710     GST_DEBUG ("%s %08x, size %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT,
4711         SPACES + sizeof (SPACES) - 1 - (2 * depth), id, element_size,
4712         hdr_len + element_size);
4713 
4714     if (element_size >= G_MAXUINT32) {
4715       GST_DEBUG ("Chunk too big for typefinding");
4716       return FALSE;
4717     }
4718 
4719     if (!data_scan_ctx_ensure_data (tf, &c, element_size)) {
4720       GST_DEBUG ("not enough data");
4721       return FALSE;
4722     }
4723 
4724     switch (id) {
4725       case EBML_DOCTYPE:
4726         if (element_size >= 8 && memcmp (c.data, "matroska", 8) == 0)
4727           info->doctype = EBML_DOCTYPE_MATROSKA;
4728         else if (element_size >= 4 && memcmp (c.data, "webm", 4) == 0)
4729           info->doctype = EBML_DOCTYPE_WEBM;
4730         break;
4731       case MATROSKA_SEGMENT:
4732         GST_LOG ("parsing segment");
4733         ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1);
4734         GST_LOG ("parsed segment, done");
4735         return FALSE;
4736       case MATROSKA_TRACKS:
4737         GST_LOG ("parsing tracks");
4738         info->tracks_ok =
4739             ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1);
4740         GST_LOG ("parsed tracks: %s, done (after %" G_GUINT64_FORMAT " bytes)",
4741             info->tracks_ok ? "ok" : "FAIL", c.offset + element_size);
4742         return FALSE;
4743       case MATROSKA_TRACK_ENTRY:
4744         GST_LOG ("parsing track entry");
4745         if (!ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1))
4746           return FALSE;
4747         break;
4748       case MATROSKA_TRACK_TYPE:{
4749         guint type = 0, i;
4750 
4751         /* is supposed to always be 1-byte, but not everyone's following that */
4752         for (i = 0; i < element_size; ++i)
4753           type = (type << 8) | c.data[i];
4754 
4755         GST_DEBUG ("%s   track type %u",
4756             SPACES + sizeof (SPACES) - 1 - (2 * depth), type);
4757 
4758         if (type == 1)
4759           ++info->video;
4760         else if (c.data[0] == 2)
4761           ++info->audio;
4762         else
4763           ++info->other;
4764         break;
4765       }
4766       case MATROSKA_STEREO_MODE:
4767         ++info->video_stereo;
4768         break;
4769       case MATROSKA_CLUSTER:
4770         GST_WARNING ("cluster, bailing out (should've found tracks by now)");
4771         return FALSE;
4772       default:
4773         break;
4774     }
4775     data_scan_ctx_advance (tf, &c, element_size);
4776     chunk_size -= element_size;
4777     ++info->chunks;
4778   }
4779 
4780   return TRUE;
4781 }
4782 
4783 static GstStaticCaps matroska_caps = GST_STATIC_CAPS ("video/x-matroska");
4784 
4785 #define MATROSKA_CAPS (gst_static_caps_get(&matroska_caps))
4786 static void
matroska_type_find(GstTypeFind * tf,gpointer unused)4787 matroska_type_find (GstTypeFind * tf, gpointer unused)
4788 {
4789   GstTypeFindProbability prob;
4790   GstMatroskaInfo info = { 0, };
4791   const gchar *type_name;
4792   DataScanCtx c = { 0, NULL, 0 };
4793   gboolean is_audio;
4794   guint64 size;
4795   guint32 id, hdr_len;
4796 
4797   if (!data_scan_ctx_ensure_data (tf, &c, 64))
4798     return;
4799 
4800   if (GST_READ_UINT32_BE (c.data) != EBML_HEADER)
4801     return;
4802 
4803   while (c.offset < EBML_MAX_LEN && data_scan_ctx_ensure_data (tf, &c, 64)) {
4804     hdr_len = ebml_read_chunk_header (tf, &c, c.size, &id, &size);
4805     if (hdr_len == 0)
4806       return;
4807 
4808     GST_INFO ("=== top-level chunk %08x, size %" G_GUINT64_FORMAT
4809         " / %" G_GUINT64_FORMAT, id, size, size + hdr_len);
4810 
4811     if (!ebml_parse_chunk (tf, &c, id, size, &info, 0))
4812       break;
4813     data_scan_ctx_advance (tf, &c, size);
4814     GST_INFO ("=== done with chunk %08x", id);
4815     if (id == MATROSKA_SEGMENT)
4816       break;
4817   }
4818 
4819   GST_INFO ("audio=%u video=%u other=%u chunks=%u doctype=%d all_tracks=%d",
4820       info.audio, info.video, info.other, info.chunks, info.doctype,
4821       info.tracks_ok);
4822 
4823   /* perhaps we should bail out if tracks_ok is FALSE and wait for more data?
4824    * (we would need new API to signal this properly and prevent other
4825    * typefinders from taking over the decision then) */
4826   is_audio = (info.audio > 0 && info.video == 0 && info.other == 0);
4827 
4828   if (info.doctype == EBML_DOCTYPE_WEBM) {
4829     type_name = (is_audio) ? "audio/webm" : "video/webm";
4830   } else if (info.video > 0 && info.video_stereo) {
4831     type_name = "video/x-matroska-3d";
4832   } else {
4833     type_name = (is_audio) ? "audio/x-matroska" : "video/x-matroska";
4834   }
4835 
4836   if (info.doctype == EBML_DOCTYPE_UNKNOWN)
4837     prob = GST_TYPE_FIND_LIKELY;
4838   else
4839     prob = GST_TYPE_FIND_MAXIMUM;
4840 
4841   gst_type_find_suggest_empty_simple (tf, prob, type_name);
4842 }
4843 
4844 /*** application/mxf ***/
4845 static GstStaticCaps mxf_caps = GST_STATIC_CAPS ("application/mxf");
4846 
4847 #define MXF_MAX_PROBE_LENGTH (1024 * 64)
4848 #define MXF_CAPS (gst_static_caps_get(&mxf_caps))
4849 
4850 /*
4851  * MXF files start with a header partition pack key of 16 bytes which is defined
4852  * at SMPTE-377M 6.1. Before this there can be up to 64K of run-in which _must_
4853  * not contain the partition pack key.
4854  */
4855 static void
mxf_type_find(GstTypeFind * tf,gpointer unused)4856 mxf_type_find (GstTypeFind * tf, gpointer unused)
4857 {
4858   static const guint8 partition_pack_key[] =
4859       { 0x06, 0x0e, 0x2b, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0d, 0x01, 0x02, 0x01,
4860     0x01
4861   };
4862   DataScanCtx c = { 0, NULL, 0 };
4863 
4864   while (c.offset <= MXF_MAX_PROBE_LENGTH) {
4865     guint i;
4866     if (G_UNLIKELY (!data_scan_ctx_ensure_data (tf, &c, 1024)))
4867       break;
4868 
4869     /* look over in chunks of 1kbytes to avoid too much overhead */
4870 
4871     for (i = 0; i < 1024 - 16; i++) {
4872       /* Check first byte before calling more expensive memcmp function */
4873       if (G_UNLIKELY (c.data[i] == 0x06
4874               && memcmp (c.data + i, partition_pack_key, 13) == 0)) {
4875         /* Header partition pack? */
4876         if (c.data[i + 13] != 0x02)
4877           goto advance;
4878 
4879         /* Partition status */
4880         if (c.data[i + 14] >= 0x05)
4881           goto advance;
4882 
4883         /* Reserved, must be 0x00 */
4884         if (c.data[i + 15] != 0x00)
4885           goto advance;
4886 
4887         gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MXF_CAPS);
4888         return;
4889       }
4890     }
4891 
4892   advance:
4893     data_scan_ctx_advance (tf, &c, 1024 - 16);
4894   }
4895 }
4896 
4897 /*** video/x-dv ***/
4898 
4899 static GstStaticCaps dv_caps = GST_STATIC_CAPS ("video/x-dv, "
4900     "systemstream = (boolean) true");
4901 #define DV_CAPS (gst_static_caps_get(&dv_caps))
4902 static void
dv_type_find(GstTypeFind * tf,gpointer private)4903 dv_type_find (GstTypeFind * tf, gpointer private)
4904 {
4905   const guint8 *data;
4906 
4907   data = gst_type_find_peek (tf, 0, 5);
4908 
4909   /* check for DIF  and DV flag */
4910   if (data && (data[0] == 0x1f) && (data[1] == 0x07) && (data[2] == 0x00)) {
4911     const gchar *format;
4912 
4913     if (data[3] & 0x80) {
4914       format = "PAL";
4915     } else {
4916       format = "NTSC";
4917     }
4918 
4919     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, "video/x-dv",
4920         "systemstream", G_TYPE_BOOLEAN, TRUE,
4921         "format", G_TYPE_STRING, format, NULL);
4922   }
4923 }
4924 
4925 
4926 /*** Ogg variants ***/
4927 static GstStaticCaps ogg_caps =
4928     GST_STATIC_CAPS ("application/ogg;video/ogg;audio/ogg;application/kate");
4929 
4930 #define OGG_CAPS (gst_static_caps_get(&ogg_caps))
4931 
4932 typedef enum
4933 {
4934   OGG_AUDIO = 0,
4935   OGG_VIDEO,
4936   OGG_KATE,
4937   OGG_OTHER,
4938   OGG_SKELETON,
4939   OGG_ANNODEX,
4940   OGG_NUM
4941 } GstOggStreamType;
4942 
4943 static void
ogganx_type_find(GstTypeFind * tf,gpointer private)4944 ogganx_type_find (GstTypeFind * tf, gpointer private)
4945 {
4946   const gchar *media_type;
4947   DataScanCtx c = { 0, NULL, 0 };
4948   guint ogg_syncs = 0;
4949   guint hdr_count[OGG_NUM] = { 0, };
4950   static const struct
4951   {
4952     const gchar marker[10];
4953     guint8 marker_size;
4954     GstOggStreamType stream_type;
4955   } markers[] = {
4956     {
4957     "\001vorbis", 7, OGG_AUDIO}, {
4958     "\200theora", 7, OGG_VIDEO}, {
4959     "fLaC", 4, OGG_AUDIO}, {
4960     "\177FLAC", 5, OGG_AUDIO}, {
4961     "Speex", 5, OGG_AUDIO}, {
4962     "CMML\0\0\0\0", 8, OGG_OTHER}, {
4963     "PCM     ", 8, OGG_AUDIO}, {
4964     "Annodex", 7, OGG_ANNODEX}, {
4965     "fishead", 7, OGG_SKELETON}, {
4966     "AnxData", 7, OGG_ANNODEX}, {
4967     "CELT    ", 8, OGG_AUDIO}, {
4968     "\200kate\0\0\0", 8, OGG_KATE}, {
4969     "BBCD\0", 5, OGG_VIDEO}, {
4970     "OVP80\1\1", 7, OGG_VIDEO}, {
4971     "OpusHead", 8, OGG_AUDIO}, {
4972     "\001audio\0\0\0", 9, OGG_AUDIO}, {
4973     "\001video\0\0\0", 9, OGG_VIDEO}, {
4974     "\001text\0\0\0", 9, OGG_OTHER}
4975   };
4976 
4977   while (c.offset < 4096 && data_scan_ctx_ensure_data (tf, &c, 64)) {
4978     guint size, i;
4979 
4980     if (memcmp (c.data, "OggS", 5) != 0)
4981       break;
4982 
4983     ++ogg_syncs;
4984 
4985     /* check if BOS */
4986     if (c.data[5] != 0x02)
4987       break;
4988 
4989     /* headers should only have one segment */
4990     if (c.data[26] != 1)
4991       break;
4992 
4993     size = c.data[27];
4994     if (size < 8)
4995       break;
4996 
4997     data_scan_ctx_advance (tf, &c, 28);
4998 
4999     if (!data_scan_ctx_ensure_data (tf, &c, MAX (size, 8)))
5000       break;
5001 
5002     for (i = 0; i < G_N_ELEMENTS (markers); ++i) {
5003       if (memcmp (c.data, markers[i].marker, markers[i].marker_size) == 0) {
5004         ++hdr_count[markers[i].stream_type];
5005         break;
5006       }
5007     }
5008 
5009     if (i == G_N_ELEMENTS (markers)) {
5010       GST_MEMDUMP ("unknown Ogg stream marker", c.data, size);
5011       ++hdr_count[OGG_OTHER];
5012     }
5013 
5014     data_scan_ctx_advance (tf, &c, size);
5015   }
5016 
5017   if (ogg_syncs == 0)
5018     return;
5019 
5020   /* We don't bother with annodex types. FIXME: what about XSPF? */
5021   if (hdr_count[OGG_VIDEO] > 0) {
5022     media_type = "video/ogg";
5023   } else if (hdr_count[OGG_AUDIO] > 0) {
5024     media_type = "audio/ogg";
5025   } else if (hdr_count[OGG_KATE] > 0 && hdr_count[OGG_OTHER] == 0) {
5026     media_type = "application/kate";
5027   } else {
5028     media_type = "application/ogg";
5029   }
5030 
5031   GST_INFO ("found %s (audio:%u, video:%u, annodex:%u, skeleton:%u, other:%u)",
5032       media_type, hdr_count[OGG_AUDIO], hdr_count[OGG_VIDEO],
5033       hdr_count[OGG_ANNODEX], hdr_count[OGG_SKELETON], hdr_count[OGG_OTHER]);
5034 
5035   gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_MAXIMUM, media_type);
5036 }
5037 
5038 /*** audio/x-vorbis ***/
5039 static GstStaticCaps vorbis_caps = GST_STATIC_CAPS ("audio/x-vorbis");
5040 
5041 #define VORBIS_CAPS (gst_static_caps_get(&vorbis_caps))
5042 static void
vorbis_type_find(GstTypeFind * tf,gpointer private)5043 vorbis_type_find (GstTypeFind * tf, gpointer private)
5044 {
5045   const guint8 *data = gst_type_find_peek (tf, 0, 30);
5046 
5047   if (data) {
5048     guint blocksize_0;
5049     guint blocksize_1;
5050 
5051     /* 1 byte packet type (identification=0x01)
5052        6 byte string "vorbis"
5053        4 byte vorbis version */
5054     if (memcmp (data, "\001vorbis\000\000\000\000", 11) != 0)
5055       return;
5056     data += 11;
5057     /* 1 byte channels must be != 0 */
5058     if (data[0] == 0)
5059       return;
5060     data++;
5061     /* 4 byte samplerate must be != 0 */
5062     if (GST_READ_UINT32_LE (data) == 0)
5063       return;
5064     data += 16;
5065     /* blocksize checks */
5066     blocksize_0 = data[0] & 0x0F;
5067     blocksize_1 = (data[0] & 0xF0) >> 4;
5068     if (blocksize_0 > blocksize_1)
5069       return;
5070     if (blocksize_0 < 6 || blocksize_0 > 13)
5071       return;
5072     if (blocksize_1 < 6 || blocksize_1 > 13)
5073       return;
5074     data++;
5075     /* framing bit */
5076     if ((data[0] & 0x01) != 1)
5077       return;
5078     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, VORBIS_CAPS);
5079   }
5080 }
5081 
5082 /*** video/x-theora ***/
5083 
5084 static GstStaticCaps theora_caps = GST_STATIC_CAPS ("video/x-theora");
5085 
5086 #define THEORA_CAPS (gst_static_caps_get(&theora_caps))
5087 static void
theora_type_find(GstTypeFind * tf,gpointer private)5088 theora_type_find (GstTypeFind * tf, gpointer private)
5089 {
5090   const guint8 *data = gst_type_find_peek (tf, 0, 7);   //42);
5091 
5092   if (data) {
5093     if (data[0] != 0x80)
5094       return;
5095     if (memcmp (&data[1], "theora", 6) != 0)
5096       return;
5097     /* FIXME: make this more reliable when specs are out */
5098 
5099     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, THEORA_CAPS);
5100   }
5101 }
5102 
5103 /*** kate ***/
5104 static void
kate_type_find(GstTypeFind * tf,gpointer private)5105 kate_type_find (GstTypeFind * tf, gpointer private)
5106 {
5107   const guint8 *data = gst_type_find_peek (tf, 0, 64);
5108   gchar category[16] = { 0, };
5109 
5110   if (G_UNLIKELY (data == NULL))
5111     return;
5112 
5113   /* see: http://wiki.xiph.org/index.php/OggKate#Format_specification */
5114   if (G_LIKELY (memcmp (data, "\200kate\0\0\0", 8) != 0))
5115     return;
5116 
5117   /* make sure we always have a NUL-terminated string */
5118   memcpy (category, data + 48, 15);
5119   GST_LOG ("kate category: %s", category);
5120   /* canonical categories for subtitles: subtitles, spu-subtitles, SUB, K-SPU */
5121   if (strcmp (category, "subtitles") == 0 || strcmp (category, "SUB") == 0 ||
5122       strcmp (category, "spu-subtitles") == 0 ||
5123       strcmp (category, "K-SPU") == 0) {
5124     gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_MAXIMUM,
5125         "subtitle/x-kate");
5126   } else {
5127     gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_MAXIMUM,
5128         "application/x-kate");
5129   }
5130 }
5131 
5132 /*** WEBVTTT subtitles ***/
5133 static GstStaticCaps webvtt_caps =
5134 GST_STATIC_CAPS ("application/x-subtitle-vtt, parsed=(boolean)false");
5135 #define WEBVTT_CAPS (gst_static_caps_get(&webvtt_caps))
5136 
5137 static void
webvtt_type_find(GstTypeFind * tf,gpointer private)5138 webvtt_type_find (GstTypeFind * tf, gpointer private)
5139 {
5140   const guint8 *data;
5141   static const guint8 webvtt_with_bom[] = {
5142     0xef, 0xbb, 0xbf, 'W', 'E', 'B', 'V', 'T', 'T'
5143   };
5144 
5145   data = gst_type_find_peek (tf, 0, 7);
5146 
5147   if (data == NULL)
5148     return;
5149 
5150   if (memcmp (data, "WEBVTT", 6) == 0) {
5151     data += 6;
5152   } else {
5153     data = gst_type_find_peek (tf, 0, 10);
5154 
5155     if (!data)
5156       return;
5157 
5158     /* there might be a UTF-8 BOM at the beginning */
5159     if (memcmp (data, webvtt_with_bom, sizeof (webvtt_with_bom)) != 0)
5160       return;
5161 
5162     data += 9;
5163   }
5164 
5165   /* After the WEBVTT magic must be one of these chars:
5166    *   0x20 (space), 0x9 (tab), 0xa (LF) or 0xd (CR) */
5167   if (data[0] != 0x20 && data[0] != 0x9 && data[0] != 0xa && data[0] != 0xd) {
5168     return;
5169   }
5170 
5171   gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, WEBVTT_CAPS);
5172 }
5173 
5174 /*** application/x-ogm-video or audio***/
5175 
5176 static GstStaticCaps ogmvideo_caps =
5177 GST_STATIC_CAPS ("application/x-ogm-video");
5178 #define OGMVIDEO_CAPS (gst_static_caps_get(&ogmvideo_caps))
5179 static void
ogmvideo_type_find(GstTypeFind * tf,gpointer private)5180 ogmvideo_type_find (GstTypeFind * tf, gpointer private)
5181 {
5182   const guint8 *data = gst_type_find_peek (tf, 0, 9);
5183 
5184   if (data) {
5185     if (memcmp (data, "\001video\000\000\000", 9) != 0)
5186       return;
5187     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGMVIDEO_CAPS);
5188   }
5189 }
5190 
5191 static GstStaticCaps ogmaudio_caps =
5192 GST_STATIC_CAPS ("application/x-ogm-audio");
5193 #define OGMAUDIO_CAPS (gst_static_caps_get(&ogmaudio_caps))
5194 static void
ogmaudio_type_find(GstTypeFind * tf,gpointer private)5195 ogmaudio_type_find (GstTypeFind * tf, gpointer private)
5196 {
5197   const guint8 *data = gst_type_find_peek (tf, 0, 9);
5198 
5199   if (data) {
5200     if (memcmp (data, "\001audio\000\000\000", 9) != 0)
5201       return;
5202     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGMAUDIO_CAPS);
5203   }
5204 }
5205 
5206 static GstStaticCaps ogmtext_caps = GST_STATIC_CAPS ("application/x-ogm-text");
5207 
5208 #define OGMTEXT_CAPS (gst_static_caps_get(&ogmtext_caps))
5209 static void
ogmtext_type_find(GstTypeFind * tf,gpointer private)5210 ogmtext_type_find (GstTypeFind * tf, gpointer private)
5211 {
5212   const guint8 *data = gst_type_find_peek (tf, 0, 9);
5213 
5214   if (data) {
5215     if (memcmp (data, "\001text\000\000\000\000", 9) != 0)
5216       return;
5217     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGMTEXT_CAPS);
5218   }
5219 }
5220 
5221 /*** audio/x-speex ***/
5222 
5223 static GstStaticCaps speex_caps = GST_STATIC_CAPS ("audio/x-speex");
5224 
5225 #define SPEEX_CAPS (gst_static_caps_get(&speex_caps))
5226 static void
speex_type_find(GstTypeFind * tf,gpointer private)5227 speex_type_find (GstTypeFind * tf, gpointer private)
5228 {
5229   const guint8 *data = gst_type_find_peek (tf, 0, 80);
5230 
5231   if (data) {
5232     /* 8 byte string "Speex   "
5233        24 byte speex version string + int */
5234     if (memcmp (data, "Speex   ", 8) != 0)
5235       return;
5236     data += 32;
5237 
5238     /* 4 byte header size >= 80 */
5239     if (GST_READ_UINT32_LE (data) < 80)
5240       return;
5241     data += 4;
5242 
5243     /* 4 byte sample rate <= 48000 */
5244     if (GST_READ_UINT32_LE (data) > 48000)
5245       return;
5246     data += 4;
5247 
5248     /* currently there are only 3 speex modes. */
5249     if (GST_READ_UINT32_LE (data) > 3)
5250       return;
5251     data += 12;
5252 
5253     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, SPEEX_CAPS);
5254   }
5255 }
5256 
5257 /*** audio/x-celt ***/
5258 
5259 static GstStaticCaps celt_caps = GST_STATIC_CAPS ("audio/x-celt");
5260 
5261 #define CELT_CAPS (gst_static_caps_get(&celt_caps))
5262 static void
celt_type_find(GstTypeFind * tf,gpointer private)5263 celt_type_find (GstTypeFind * tf, gpointer private)
5264 {
5265   const guint8 *data = gst_type_find_peek (tf, 0, 8);
5266 
5267   if (data) {
5268     /* 8 byte string "CELT   " */
5269     if (memcmp (data, "CELT    ", 8) != 0)
5270       return;
5271 
5272     /* TODO: Check other values of the CELT header */
5273     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, CELT_CAPS);
5274   }
5275 }
5276 
5277 /*** application/x-ogg-skeleton ***/
5278 static GstStaticCaps ogg_skeleton_caps =
5279 GST_STATIC_CAPS ("application/x-ogg-skeleton, parsed=(boolean)FALSE");
5280 #define OGG_SKELETON_CAPS (gst_static_caps_get(&ogg_skeleton_caps))
5281 static void
oggskel_type_find(GstTypeFind * tf,gpointer private)5282 oggskel_type_find (GstTypeFind * tf, gpointer private)
5283 {
5284   const guint8 *data = gst_type_find_peek (tf, 0, 12);
5285 
5286   if (data) {
5287     /* 8 byte string "fishead\0" for the ogg skeleton stream */
5288     if (memcmp (data, "fishead\0", 8) != 0)
5289       return;
5290     data += 8;
5291 
5292     /* Require that the header contains version 3.0 */
5293     if (GST_READ_UINT16_LE (data) != 3)
5294       return;
5295     data += 2;
5296     if (GST_READ_UINT16_LE (data) != 0)
5297       return;
5298 
5299     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, OGG_SKELETON_CAPS);
5300   }
5301 }
5302 
5303 static GstStaticCaps cmml_caps = GST_STATIC_CAPS ("text/x-cmml");
5304 
5305 #define CMML_CAPS (gst_static_caps_get(&cmml_caps))
5306 static void
cmml_type_find(GstTypeFind * tf,gpointer private)5307 cmml_type_find (GstTypeFind * tf, gpointer private)
5308 {
5309   /* Header is 12 bytes minimum (though we don't check the minor version */
5310   const guint8 *data = gst_type_find_peek (tf, 0, 12);
5311 
5312   if (data) {
5313 
5314     /* 8 byte string "CMML\0\0\0\0" for the magic number */
5315     if (memcmp (data, "CMML\0\0\0\0", 8) != 0)
5316       return;
5317     data += 8;
5318 
5319     /* Require that the header contains at least version 2.0 */
5320     if (GST_READ_UINT16_LE (data) < 2)
5321       return;
5322 
5323     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, CMML_CAPS);
5324   }
5325 }
5326 
5327 /*** application/x-tar ***/
5328 
5329 static GstStaticCaps tar_caps = GST_STATIC_CAPS ("application/x-tar");
5330 
5331 #define TAR_CAPS (gst_static_caps_get(&tar_caps))
5332 #define OLDGNU_MAGIC "ustar  "  /* 7 chars and a NUL */
5333 #define NEWGNU_MAGIC "ustar"    /* 5 chars and a NUL */
5334 static void
tar_type_find(GstTypeFind * tf,gpointer unused)5335 tar_type_find (GstTypeFind * tf, gpointer unused)
5336 {
5337   const guint8 *data = gst_type_find_peek (tf, 257, 8);
5338 
5339   /* of course we are not certain, but we don't want other typefind funcs
5340    * to detect formats of files within the tar archive, e.g. mp3s */
5341   if (data) {
5342     if (memcmp (data, OLDGNU_MAGIC, 8) == 0) {  /* sic */
5343       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, TAR_CAPS);
5344     } else if (memcmp (data, NEWGNU_MAGIC, 6) == 0 &&   /* sic */
5345         g_ascii_isdigit (data[6]) && g_ascii_isdigit (data[7])) {
5346       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, TAR_CAPS);
5347     }
5348   }
5349 }
5350 
5351 /*** application/x-ar ***/
5352 
5353 static GstStaticCaps ar_caps = GST_STATIC_CAPS ("application/x-ar");
5354 
5355 #define AR_CAPS (gst_static_caps_get(&ar_caps))
5356 static void
ar_type_find(GstTypeFind * tf,gpointer unused)5357 ar_type_find (GstTypeFind * tf, gpointer unused)
5358 {
5359   const guint8 *data = gst_type_find_peek (tf, 0, 24);
5360 
5361   if (data && memcmp (data, "!<arch>", 7) == 0) {
5362     gint i;
5363 
5364     for (i = 7; i < 24; ++i) {
5365       if (!g_ascii_isprint (data[i]) && data[i] != '\n') {
5366         gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, AR_CAPS);
5367       }
5368     }
5369 
5370     gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, AR_CAPS);
5371   }
5372 }
5373 
5374 /*** audio/x-au ***/
5375 
5376 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5377  * as it is only possible to register one typefind factory per 'name'
5378  * (which is in this case the caps), and the first one would be replaced by
5379  * the second one. */
5380 static GstStaticCaps au_caps = GST_STATIC_CAPS ("audio/x-au");
5381 
5382 #define AU_CAPS (gst_static_caps_get(&au_caps))
5383 static void
au_type_find(GstTypeFind * tf,gpointer unused)5384 au_type_find (GstTypeFind * tf, gpointer unused)
5385 {
5386   const guint8 *data = gst_type_find_peek (tf, 0, 4);
5387 
5388   if (data) {
5389     if (memcmp (data, ".snd", 4) == 0 || memcmp (data, "dns.", 4) == 0) {
5390       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, AU_CAPS);
5391     }
5392   }
5393 }
5394 
5395 
5396 /*** video/x-nuv ***/
5397 
5398 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5399  * as it is only possible to register one typefind factory per 'name'
5400  * (which is in this case the caps), and the first one would be replaced by
5401  * the second one. */
5402 static GstStaticCaps nuv_caps = GST_STATIC_CAPS ("video/x-nuv");
5403 
5404 #define NUV_CAPS (gst_static_caps_get(&nuv_caps))
5405 static void
nuv_type_find(GstTypeFind * tf,gpointer unused)5406 nuv_type_find (GstTypeFind * tf, gpointer unused)
5407 {
5408   const guint8 *data = gst_type_find_peek (tf, 0, 11);
5409 
5410   if (data) {
5411     if (memcmp (data, "MythTVVideo", 11) == 0
5412         || memcmp (data, "NuppelVideo", 11) == 0) {
5413       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, NUV_CAPS);
5414     }
5415   }
5416 }
5417 
5418 /*** audio/x-paris ***/
5419 /* NOTE: do not replace this function with two TYPE_FIND_REGISTER_START_WITH */
5420 static GstStaticCaps paris_caps = GST_STATIC_CAPS ("audio/x-paris");
5421 
5422 #define PARIS_CAPS (gst_static_caps_get(&paris_caps))
5423 static void
paris_type_find(GstTypeFind * tf,gpointer unused)5424 paris_type_find (GstTypeFind * tf, gpointer unused)
5425 {
5426   const guint8 *data = gst_type_find_peek (tf, 0, 4);
5427 
5428   if (data) {
5429     if (memcmp (data, " paf", 4) == 0 || memcmp (data, "fap ", 4) == 0) {
5430       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, PARIS_CAPS);
5431     }
5432   }
5433 }
5434 
5435 /*** audio/x-sbc ***/
5436 static GstStaticCaps sbc_caps = GST_STATIC_CAPS ("audio/x-sbc");
5437 #define SBC_CAPS (gst_static_caps_get(&sbc_caps))
5438 
5439 static gsize
sbc_check_header(const guint8 * data,gsize len,guint * rate,guint * channels)5440 sbc_check_header (const guint8 * data, gsize len, guint * rate,
5441     guint * channels)
5442 {
5443   static const guint16 sbc_rates[4] = { 16000, 32000, 44100, 48000 };
5444   static const guint8 sbc_blocks[4] = { 4, 8, 12, 16 };
5445   guint n_blocks, ch_mode, n_subbands, bitpool;
5446 
5447   if (data[0] != 0x9C || len < 4)
5448     return 0;
5449 
5450   n_blocks = sbc_blocks[(data[1] >> 4) & 0x03];
5451   ch_mode = (data[1] >> 2) & 0x03;
5452   n_subbands = (data[1] & 0x01) ? 8 : 4;
5453   bitpool = data[2];
5454   if (bitpool < 2)
5455     return 0;
5456 
5457   *rate = sbc_rates[(data[1] >> 6) & 0x03];
5458   *channels = (ch_mode == 0) ? 1 : 2;
5459 
5460   if (ch_mode == 0)
5461     return 4 + (n_subbands * 1) / 2 + (n_blocks * 1 * bitpool) / 8;
5462   else if (ch_mode == 1)
5463     return 4 + (n_subbands * 2) / 2 + (n_blocks * 2 * bitpool) / 8;
5464   else if (ch_mode == 2)
5465     return 4 + (n_subbands * 2) / 2 + (n_blocks * bitpool) / 8;
5466   else if (ch_mode == 3)
5467     return 4 + (n_subbands * 2) / 2 + (n_subbands + n_blocks * bitpool) / 8;
5468 
5469   return 0;
5470 }
5471 
5472 static void
sbc_type_find(GstTypeFind * tf,gpointer unused)5473 sbc_type_find (GstTypeFind * tf, gpointer unused)
5474 {
5475   const guint8 *data;
5476   gsize frame_len;
5477   guint i, rate, channels, offset = 0;
5478 
5479   for (i = 0; i < 10; ++i) {
5480     data = gst_type_find_peek (tf, offset, 8);
5481     if (data == NULL)
5482       return;
5483 
5484     frame_len = sbc_check_header (data, 8, &rate, &channels);
5485     if (frame_len == 0)
5486       return;
5487 
5488     offset += frame_len;
5489   }
5490   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_POSSIBLE, "audio/x-sbc",
5491       "rate", G_TYPE_INT, rate, "channels", G_TYPE_INT, channels,
5492       "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
5493 }
5494 
5495 /*** audio/iLBC-sh ***/
5496 /* NOTE: do not replace this function with two TYPE_FIND_REGISTER_START_WITH */
5497 static GstStaticCaps ilbc_caps = GST_STATIC_CAPS ("audio/iLBC-sh");
5498 
5499 #define ILBC_CAPS (gst_static_caps_get(&ilbc_caps))
5500 static void
ilbc_type_find(GstTypeFind * tf,gpointer unused)5501 ilbc_type_find (GstTypeFind * tf, gpointer unused)
5502 {
5503   const guint8 *data = gst_type_find_peek (tf, 0, 8);
5504 
5505   if (data) {
5506     if (memcmp (data, "#!iLBC30", 8) == 0 || memcmp (data, "#!iLBC20", 8) == 0) {
5507       gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, ILBC_CAPS);
5508     }
5509   }
5510 }
5511 
5512 /*** application/x-ms-dos-executable ***/
5513 
5514 static GstStaticCaps msdos_caps =
5515 GST_STATIC_CAPS ("application/x-ms-dos-executable");
5516 #define MSDOS_CAPS (gst_static_caps_get(&msdos_caps))
5517 /* see http://www.madchat.org/vxdevl/papers/winsys/pefile/pefile.htm */
5518 static void
msdos_type_find(GstTypeFind * tf,gpointer unused)5519 msdos_type_find (GstTypeFind * tf, gpointer unused)
5520 {
5521   const guint8 *data = gst_type_find_peek (tf, 0, 64);
5522 
5523   if (data && data[0] == 'M' && data[1] == 'Z' &&
5524       GST_READ_UINT16_LE (data + 8) == 4) {
5525     guint32 pe_offset = GST_READ_UINT32_LE (data + 60);
5526 
5527     data = gst_type_find_peek (tf, pe_offset, 2);
5528     if (data && data[0] == 'P' && data[1] == 'E') {
5529       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, MSDOS_CAPS);
5530     }
5531   }
5532 }
5533 
5534 /*** application/x-mmsh ***/
5535 
5536 static GstStaticCaps mmsh_caps = GST_STATIC_CAPS ("application/x-mmsh");
5537 
5538 #define MMSH_CAPS gst_static_caps_get(&mmsh_caps)
5539 
5540 /* This is to recognise mssh-over-http */
5541 static void
mmsh_type_find(GstTypeFind * tf,gpointer unused)5542 mmsh_type_find (GstTypeFind * tf, gpointer unused)
5543 {
5544   static const guint8 asf_marker[16] = { 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66,
5545     0xcf, 0x11, 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62, 0xce, 0x6c
5546   };
5547 
5548   const guint8 *data;
5549 
5550   data = gst_type_find_peek (tf, 0, 2 + 2 + 4 + 2 + 2 + 16);
5551   if (data && data[0] == 0x24 && data[1] == 0x48 &&
5552       GST_READ_UINT16_LE (data + 2) > 2 + 2 + 4 + 2 + 2 + 16 &&
5553       memcmp (data + 2 + 2 + 4 + 2 + 2, asf_marker, 16) == 0) {
5554     gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, MMSH_CAPS);
5555   }
5556 }
5557 
5558 /*** video/x-dirac ***/
5559 
5560 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5561  * as it is only possible to register one typefind factory per 'name'
5562  * (which is in this case the caps), and the first one would be replaced by
5563  * the second one. */
5564 static GstStaticCaps dirac_caps = GST_STATIC_CAPS ("video/x-dirac");
5565 
5566 #define DIRAC_CAPS (gst_static_caps_get(&dirac_caps))
5567 static void
dirac_type_find(GstTypeFind * tf,gpointer unused)5568 dirac_type_find (GstTypeFind * tf, gpointer unused)
5569 {
5570   const guint8 *data = gst_type_find_peek (tf, 0, 8);
5571 
5572   if (data) {
5573     if (memcmp (data, "BBCD", 4) == 0 || memcmp (data, "KW-DIRAC", 8) == 0) {
5574       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, DIRAC_CAPS);
5575     }
5576   }
5577 }
5578 
5579 /*** audio/x-tap-tap ***/
5580 
5581 /* NOTE: we cannot replace this function with TYPE_FIND_REGISTER_START_WITH,
5582  * as it is only possible to register one typefind factory per 'name'
5583  * (which is in this case the caps), and the first one would be replaced by
5584  * the second one. */
5585 static GstStaticCaps tap_caps = GST_STATIC_CAPS ("audio/x-tap-tap");
5586 
5587 #define TAP_CAPS (gst_static_caps_get(&tap_caps))
5588 static void
tap_type_find(GstTypeFind * tf,gpointer unused)5589 tap_type_find (GstTypeFind * tf, gpointer unused)
5590 {
5591   const guint8 *data = gst_type_find_peek (tf, 0, 16);
5592 
5593   if (data) {
5594     if (memcmp (data, "C64-TAPE-RAW", 12) == 0
5595         || memcmp (data, "C16-TAPE-RAW", 12) == 0) {
5596       gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, TAP_CAPS);
5597     }
5598   }
5599 }
5600 
5601 /*** video/vivo ***/
5602 
5603 static GstStaticCaps vivo_caps = GST_STATIC_CAPS ("video/vivo");
5604 
5605 #define VIVO_CAPS gst_static_caps_get(&vivo_caps)
5606 
5607 static void
vivo_type_find(GstTypeFind * tf,gpointer unused)5608 vivo_type_find (GstTypeFind * tf, gpointer unused)
5609 {
5610   static const guint8 vivo_marker[] = { 'V', 'e', 'r', 's', 'i', 'o', 'n',
5611     ':', 'V', 'i', 'v', 'o', '/'
5612   };
5613   const guint8 *data;
5614   guint hdr_len, pos;
5615 
5616   data = gst_type_find_peek (tf, 0, 1024);
5617   if (data == NULL || data[0] != 0x00)
5618     return;
5619 
5620   if ((data[1] & 0x80)) {
5621     if ((data[2] & 0x80))
5622       return;
5623     hdr_len = ((guint) (data[1] & 0x7f)) << 7;
5624     hdr_len += data[2];
5625     if (hdr_len > 2048)
5626       return;
5627     pos = 3;
5628   } else {
5629     hdr_len = data[1];
5630     pos = 2;
5631   }
5632 
5633   /* 1008 = 1022 - strlen ("Version:Vivo/") - 1 */
5634   while (pos < 1008 && data[pos] == '\r' && data[pos + 1] == '\n')
5635     pos += 2;
5636 
5637   if (memcmp (data + pos, vivo_marker, sizeof (vivo_marker)) == 0) {
5638     gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, VIVO_CAPS);
5639   }
5640 }
5641 
5642 /*** XDG MIME typefinder (to avoid false positives mostly) ***/
5643 
5644 #ifdef USE_GIO
5645 static gboolean
xdgmime_validate_name(const gchar * name)5646 xdgmime_validate_name (const gchar * name)
5647 {
5648   const gchar *s;
5649 
5650   if (G_UNLIKELY (!g_ascii_isalpha (*name))) {
5651     return FALSE;
5652   }
5653 
5654   /* FIXME: test name string more */
5655   s = &name[1];
5656   while (*s && (g_ascii_isalnum (*s) || strchr ("/-_.:+", *s) != NULL))
5657     s++;
5658   if (G_UNLIKELY (*s != '\0')) {
5659     return FALSE;
5660   }
5661 
5662   return TRUE;
5663 }
5664 
5665 static void
xdgmime_typefind(GstTypeFind * find,gpointer user_data)5666 xdgmime_typefind (GstTypeFind * find, gpointer user_data)
5667 {
5668   gchar *mimetype;
5669   gsize length = 16384;
5670   guint64 tf_length;
5671   const guint8 *data;
5672   gchar *tmp;
5673 
5674   if ((tf_length = gst_type_find_get_length (find)) > 0)
5675     length = MIN (length, tf_length);
5676 
5677   if ((data = gst_type_find_peek (find, 0, length)) == NULL)
5678     return;
5679 
5680   tmp = g_content_type_guess (NULL, data, length, NULL);
5681   if (tmp == NULL || g_content_type_is_unknown (tmp)) {
5682     g_free (tmp);
5683     return;
5684   }
5685 
5686   mimetype = g_content_type_get_mime_type (tmp);
5687   g_free (tmp);
5688 
5689   if (mimetype == NULL)
5690     return;
5691 
5692   GST_DEBUG ("Got mimetype '%s'", mimetype);
5693 
5694   /* Ignore audio/video types:
5695    *  - our own typefinders in -base are likely to be better at this
5696    *    (and if they're not, we really want to fix them, that's why we don't
5697    *    report xdg-detected audio/video types at all, not even with a low
5698    *    probability)
5699    *  - we want to detect GStreamer media types and not MIME types
5700    *  - the purpose of this xdg mime finder is mainly to prevent false
5701    *    positives of non-media formats, not to typefind audio/video formats */
5702   if (g_str_has_prefix (mimetype, "audio/") ||
5703       g_str_has_prefix (mimetype, "video/")) {
5704     GST_LOG ("Ignoring audio/video mime type");
5705     g_free (mimetype);
5706     return;
5707   }
5708 
5709   if (!xdgmime_validate_name (mimetype)) {
5710     GST_LOG ("Ignoring mimetype with invalid structure name");
5711     g_free (mimetype);
5712     return;
5713   }
5714 
5715   /* Again, we mainly want the xdg typefinding to prevent false-positives on
5716    * non-media formats, so suggest the type with a probability that trumps
5717    * uncertain results of our typefinders, but not more than that. */
5718   GST_LOG ("Suggesting '%s' with probability POSSIBLE", mimetype);
5719   gst_type_find_suggest_empty_simple (find, GST_TYPE_FIND_POSSIBLE, mimetype);
5720   g_free (mimetype);
5721 }
5722 #endif /* USE_GIO */
5723 
5724 /*** Windows icon typefinder (to avoid false positives mostly) ***/
5725 
5726 static void
windows_icon_typefind(GstTypeFind * find,gpointer user_data)5727 windows_icon_typefind (GstTypeFind * find, gpointer user_data)
5728 {
5729   const guint8 *data;
5730   gint64 datalen;
5731   guint16 type, nimages;
5732   gint32 size, offset;
5733 
5734   datalen = gst_type_find_get_length (find);
5735   if (datalen < 22)
5736     return;
5737   if ((data = gst_type_find_peek (find, 0, 6)) == NULL)
5738     return;
5739 
5740   /* header - simple and not enough to rely on it alone */
5741   if (GST_READ_UINT16_LE (data) != 0)
5742     return;
5743   type = GST_READ_UINT16_LE (data + 2);
5744   if (type != 1 && type != 2)
5745     return;
5746   nimages = GST_READ_UINT16_LE (data + 4);
5747   if (nimages == 0)             /* we can assume we can't have an empty image file ? */
5748     return;
5749 
5750   /* first image */
5751   if (data[6 + 3] != 0)
5752     return;
5753   if (type == 1) {
5754     guint16 planes = GST_READ_UINT16_LE (data + 6 + 4);
5755     if (planes > 1)
5756       return;
5757   }
5758   size = GST_READ_UINT32_LE (data + 6 + 8);
5759   offset = GST_READ_UINT32_LE (data + 6 + 12);
5760   if (offset < 0 || size <= 0 || size >= datalen || offset >= datalen
5761       || size + offset > datalen)
5762     return;
5763 
5764   gst_type_find_suggest_empty_simple (find, GST_TYPE_FIND_NEARLY_CERTAIN,
5765       "image/x-icon");
5766 }
5767 
5768 /*** WAP WBMP typefinder ***/
5769 
5770 static void
wbmp_typefind(GstTypeFind * find,gpointer user_data)5771 wbmp_typefind (GstTypeFind * find, gpointer user_data)
5772 {
5773   const guint8 *data;
5774   gint64 datalen;
5775   guint w, h, size;
5776 
5777   /* http://en.wikipedia.org/wiki/Wireless_Application_Protocol_Bitmap_Format */
5778   datalen = gst_type_find_get_length (find);
5779   if (datalen == 0)
5780     return;
5781 
5782   data = gst_type_find_peek (find, 0, 5);
5783   if (data == NULL)
5784     return;
5785 
5786   /* want 0x00 0x00 at start */
5787   if (*data++ != 0 || *data++ != 0)
5788     return;
5789 
5790   /* min header size */
5791   size = 4;
5792 
5793   /* let's assume max width/height is 65536 */
5794   w = *data++;
5795   if ((w & 0x80)) {
5796     w = (w << 8) | *data++;
5797     if ((w & 0x80))
5798       return;
5799     ++size;
5800     data = gst_type_find_peek (find, 4, 2);
5801     if (data == NULL)
5802       return;
5803   }
5804   h = *data++;
5805   if ((h & 0x80)) {
5806     h = (h << 8) | *data++;
5807     if ((h & 0x80))
5808       return;
5809     ++size;
5810   }
5811 
5812   if (w == 0 || h == 0)
5813     return;
5814 
5815   /* now add bitmap size */
5816   size += h * (GST_ROUND_UP_8 (w) / 8);
5817 
5818   if (datalen == size) {
5819     gst_type_find_suggest_empty_simple (find, GST_TYPE_FIND_POSSIBLE - 10,
5820         "image/vnd.wap.wbmp");
5821   }
5822 }
5823 
5824 /*** DEGAS Atari images (also to avoid false positives, see #625129) ***/
5825 static void
degas_type_find(GstTypeFind * tf,gpointer private)5826 degas_type_find (GstTypeFind * tf, gpointer private)
5827 {
5828   /* No magic, but it should have a fixed size and a few invalid values */
5829   /* http://www.fileformat.info/format/atari/spec/6ecf9f6eb5be494284a47feb8a214687/view.htm */
5830   gint64 len;
5831   const guint8 *data;
5832   guint16 resolution;
5833   int n;
5834 
5835   len = gst_type_find_get_length (tf);
5836   if (len < 34)                 /* smallest header of the lot */
5837     return;
5838   data = gst_type_find_peek (tf, 0, 4);
5839   if (G_UNLIKELY (data == NULL))
5840     return;
5841   resolution = GST_READ_UINT16_BE (data);
5842   if (len == 32034) {
5843     /* could be DEGAS */
5844     if (resolution <= 2)
5845       gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_POSSIBLE + 5,
5846           "image/x-degas");
5847   } else if (len == 32066) {
5848     /* could be DEGAS Elite */
5849     if (resolution <= 2) {
5850       data = gst_type_find_peek (tf, len - 16, 8);
5851       if (G_UNLIKELY (data == NULL))
5852         return;
5853       for (n = 0; n < 4; n++) {
5854         if (GST_READ_UINT16_BE (data + n * 2) > 2)
5855           return;
5856       }
5857       gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_POSSIBLE + 5,
5858           "image/x-degas");
5859     }
5860   } else if (len >= 66 && len < 32066) {
5861     /* could be compressed DEGAS Elite, but it's compressed and so we can't rely on size,
5862        it does have 4 16 bytes values near the end that are 0-2 though. */
5863     if ((resolution & 0x8000) && (resolution & 0x7fff) <= 2) {
5864       data = gst_type_find_peek (tf, len - 16, 8);
5865       if (G_UNLIKELY (data == NULL))
5866         return;
5867       for (n = 0; n < 4; n++) {
5868         if (GST_READ_UINT16_BE (data + n * 2) > 2)
5869           return;
5870       }
5871       gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_POSSIBLE + 5,
5872           "image/x-degas");
5873     }
5874   }
5875 }
5876 
5877 /*** y4m ***/
5878 
5879 static void
y4m_typefind(GstTypeFind * tf,gpointer private)5880 y4m_typefind (GstTypeFind * tf, gpointer private)
5881 {
5882   const guint8 *data;
5883 
5884   data = gst_type_find_peek (tf, 0, 10);
5885   if (data != NULL && memcmp (data, "YUV4MPEG2 ", 10) == 0) {
5886     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY,
5887         "application/x-yuv4mpeg", "y4mversion", G_TYPE_INT, 2, NULL);
5888   }
5889 }
5890 
5891 /*** DVD ISO images (looks like H.264, see #674069) ***/
5892 static void
dvdiso_type_find(GstTypeFind * tf,gpointer private)5893 dvdiso_type_find (GstTypeFind * tf, gpointer private)
5894 {
5895   /* 0x8000 bytes of zeros, then "\001CD001" */
5896   gint64 len;
5897   const guint8 *data;
5898 
5899   len = gst_type_find_get_length (tf);
5900   if (len < 0x8006)
5901     return;
5902   data = gst_type_find_peek (tf, 0, 0x8006);
5903   if (G_UNLIKELY (data == NULL))
5904     return;
5905   for (len = 0; len < 0x8000; len++)
5906     if (data[len])
5907       return;
5908   /* Can the '1' be anything else ? My three samples all have '1'. */
5909   if (memcmp (data + 0x8000, "\001CD001", 6))
5910     return;
5911 
5912   /* May need more inspection, we may be able to demux some of them */
5913   gst_type_find_suggest_empty_simple (tf, GST_TYPE_FIND_LIKELY,
5914       "application/octet-stream");
5915 }
5916 
5917 /* SSA/ASS subtitles
5918  *
5919  * http://en.wikipedia.org/wiki/SubStation_Alpha
5920  * http://matroska.org/technical/specs/subtitles/ssa.html
5921  */
5922 static void
ssa_type_find(GstTypeFind * tf,gpointer private)5923 ssa_type_find (GstTypeFind * tf, gpointer private)
5924 {
5925   const gchar *start, *end, *ver_str, *media_type = NULL;
5926   const guint8 *data;
5927   gchar *str, *script_type, *p = NULL;
5928   gint64 len;
5929 
5930   data = gst_type_find_peek (tf, 0, 32);
5931 
5932   if (data == NULL)
5933     return;
5934 
5935   /* FIXME: detect utf-16/32 BOM and convert before typefinding the rest */
5936 
5937   /* there might be a UTF-8 BOM at the beginning */
5938   if (memcmp (data, "[Script Info]", 13) != 0 &&
5939       memcmp (data + 3, "[Script Info]", 13) != 0) {
5940     return;
5941   }
5942 
5943   /* now check if we have SSA or ASS */
5944   len = gst_type_find_get_length (tf);
5945   if (len > 8192)
5946     len = 8192;
5947 
5948   data = gst_type_find_peek (tf, 0, len);
5949   if (data == NULL)
5950     return;
5951 
5952   /* skip BOM */
5953   start = (gchar *) memchr (data, '[', 5);
5954   g_assert (start);
5955   len -= (start - (gchar *) data);
5956 
5957   /* ignore anything non-UTF8 for now, in future we might at least allow
5958    * other UTF variants that are clearly prefixed with the appropriate BOM */
5959   if (!g_utf8_validate (start, len, &end) && (len - (end - start)) > 6) {
5960     GST_FIXME ("non-UTF8 SSA/ASS file");
5961     return;
5962   }
5963 
5964   /* something at start,  but not a UTF-8 BOM? */
5965   if (data[0] != '[' && (data[0] != 0xEF || data[1] != 0xBB || data[2] != 0xBF))
5966     return;
5967 
5968   /* ignore any partial UTF-8 characters at the end */
5969   len = end - start;
5970 
5971   /* create a NUL-terminated string so it's easier to process it safely */
5972   str = g_strndup (start, len - 1);
5973   script_type = strstr (str, "ScriptType:");
5974   if (script_type != NULL) {
5975     gdouble version;
5976 
5977     ver_str = script_type + 11;
5978     while (*ver_str == ' ' || *ver_str == 'v' || *ver_str == 'V')
5979       ++ver_str;
5980     version = g_ascii_strtod (ver_str, &p);
5981     if (version == 4.0 && p != NULL && *p == '+')
5982       media_type = "application/x-ass";
5983     else if (version >= 1.0 && version <= 4.0)
5984       media_type = "application/x-ssa";
5985   }
5986 
5987   if (media_type == NULL) {
5988     if (strstr (str, "[v4+ Styles]") || strstr (str, "[V4+ Styles]"))
5989       media_type = "application/x-ass";
5990     else if (strstr (str, "[v4 Styles]") || strstr (str, "[V4 Styles]"))
5991       media_type = "application/x-ssa";
5992   }
5993 
5994   if (media_type != NULL) {
5995     gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
5996         media_type, "parsed", G_TYPE_BOOLEAN, FALSE, NULL);
5997   } else {
5998     GST_WARNING ("could not detect SSA/ASS variant");
5999   }
6000 
6001   g_free (str);
6002 }
6003 
6004 /*** application/x-mcc ***/
6005 static GstStaticCaps mcc_caps = GST_STATIC_CAPS ("application/x-mcc");
6006 
6007 #define MCC_CAPS gst_static_caps_get(&mcc_caps)
6008 
6009 static void
mcc_type_find(GstTypeFind * tf,gpointer private)6010 mcc_type_find (GstTypeFind * tf, gpointer private)
6011 {
6012   const guint8 *data;
6013 
6014   data = gst_type_find_peek (tf, 0, 31);
6015 
6016   if (data == NULL)
6017     return;
6018 
6019   /* MCC files always start with this followed by the version */
6020   if (memcmp (data, "File Format=MacCaption_MCC V", 28) != 0 ||
6021       !g_ascii_isdigit (data[28]) || data[29] != '.' ||
6022       !g_ascii_isdigit (data[30])) {
6023     return;
6024   }
6025 
6026   gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM,
6027       "application/x-mcc", "version", G_TYPE_INT, data[28] - '0', NULL);
6028 }
6029 
6030 /*** video/x-pva ***/
6031 
6032 static GstStaticCaps pva_caps = GST_STATIC_CAPS ("video/x-pva");
6033 
6034 #define PVA_CAPS gst_static_caps_get(&pva_caps)
6035 
6036 static void
pva_type_find(GstTypeFind * tf,gpointer private)6037 pva_type_find (GstTypeFind * tf, gpointer private)
6038 {
6039   const guint8 *data;
6040 
6041   data = gst_type_find_peek (tf, 0, 5);
6042 
6043   if (data == NULL)
6044     return;
6045 
6046   if (data[0] == 'A' && data[1] == 'V' && data[2] < 3 && data[4] == 0x55)
6047     gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, PVA_CAPS);
6048 }
6049 
6050 /*** audio/audible ***/
6051 
6052 /* derived from pyaudibletags
6053  * http://code.google.com/p/pyaudibletags/source/browse/trunk/pyaudibletags.py
6054  */
6055 static GstStaticCaps aa_caps = GST_STATIC_CAPS ("audio/x-audible");
6056 
6057 #define AA_CAPS gst_static_caps_get(&aa_caps)
6058 
6059 static void
aa_type_find(GstTypeFind * tf,gpointer private)6060 aa_type_find (GstTypeFind * tf, gpointer private)
6061 {
6062   const guint8 *data;
6063 
6064   data = gst_type_find_peek (tf, 0, 12);
6065   if (data == NULL)
6066     return;
6067 
6068   if (GST_READ_UINT32_BE (data + 4) == 0x57907536) {
6069     guint64 media_len;
6070 
6071     media_len = gst_type_find_get_length (tf);
6072     if (media_len > 0 && GST_READ_UINT32_BE (data) == media_len)
6073       gst_type_find_suggest (tf, GST_TYPE_FIND_NEARLY_CERTAIN, AA_CAPS);
6074     else
6075       gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, AA_CAPS);
6076   }
6077 }
6078 
6079 /*Type find definition by functions */
6080 GST_TYPE_FIND_REGISTER_DEFINE (musepack, "audio/x-musepack", GST_RANK_PRIMARY,
6081     musepack_type_find, "mpc,mpp,mp+", MUSEPACK_CAPS, NULL, NULL);
6082 GST_TYPE_FIND_REGISTER_DEFINE (au, "audio/x-au", GST_RANK_MARGINAL,
6083     au_type_find, "au,snd", AU_CAPS, NULL, NULL);
6084 
6085 GST_TYPE_FIND_REGISTER_DEFINE (mcc, "application/x-mcc", GST_RANK_PRIMARY,
6086     mcc_type_find, "mcc", MCC_CAPS, NULL, NULL);
6087 #if 0
6088 GST_TYPE_FIND_REGISTER_START_WITH_DEFINE (smoke, "video/x-smoke",
6089     GST_RANK_PRIMARY, NULL, "\x80smoke\x00\x01\x00", 6, GST_TYPE_FIND_MAXIMUM);
6090 #endif
6091 GST_TYPE_FIND_REGISTER_DEFINE (mid, "audio/midi", GST_RANK_PRIMARY,
6092     mid_type_find, "mid,midi", MID_CAPS, NULL, NULL);
6093 GST_TYPE_FIND_REGISTER_DEFINE (mxmf, "audio/mobile-xmf", GST_RANK_PRIMARY,
6094     mxmf_type_find, "mxmf", MXMF_CAPS, NULL, NULL);
6095 GST_TYPE_FIND_REGISTER_DEFINE (flx, "video/x-fli", GST_RANK_MARGINAL,
6096     flx_type_find, "flc,fli", FLX_CAPS, NULL, NULL);
6097 GST_TYPE_FIND_REGISTER_DEFINE (id3v2, "application/x-id3v2",
6098     GST_RANK_PRIMARY + 103, id3v2_type_find, "mp3,mp2,mp1,mpga,ogg,flac,tta",
6099     ID3_CAPS, NULL, NULL);
6100 GST_TYPE_FIND_REGISTER_DEFINE (id3v1, "application/x-id3v1",
6101     GST_RANK_PRIMARY + 101, id3v1_type_find, "mp3,mp2,mp1,mpga,ogg,flac,tta",
6102     ID3_CAPS, NULL, NULL);
6103 #ifdef OHOS_OPT_COMPAT
6104 /*
6105  * ohos.opt.compat.0051
6106  * wav audio stream recognition errors
6107  */
6108 #else
6109 GST_TYPE_FIND_REGISTER_DEFINE (apetag, "application/x-apetag",
6110     GST_RANK_PRIMARY + 102, apetag_type_find, "mp3,ape,mpc,wv", APETAG_CAPS,
6111     NULL, NULL);
6112 #endif
6113 GST_TYPE_FIND_REGISTER_DEFINE (tta, "audio/x-ttafile", GST_RANK_PRIMARY,
6114     tta_type_find, "tta", TTA_CAPS, NULL, NULL);
6115 GST_TYPE_FIND_REGISTER_DEFINE (mod, "audio/x-mod", GST_RANK_SECONDARY,
6116     mod_type_find,
6117     "669,amf,ams,dbm,digi,dmf,dsm,gdm,far,imf,it,j2b,mdl,med,mod,mt2,mtm,"
6118     "okt,psm,ptm,sam,s3m,stm,stx,ult,umx,xm", MOD_CAPS, NULL, NULL);
6119 GST_TYPE_FIND_REGISTER_DEFINE (mp3, "audio/mpeg", GST_RANK_PRIMARY,
6120     mp3_type_find, "mp3,mp2,mp1,mpga", MP3_CAPS, NULL, NULL);
6121 GST_TYPE_FIND_REGISTER_DEFINE (ac3, "audio/x-ac3", GST_RANK_PRIMARY,
6122     ac3_type_find, "ac3,eac3", AC3_CAPS, NULL, NULL);
6123 GST_TYPE_FIND_REGISTER_DEFINE (dts, "audio/x-dts", GST_RANK_SECONDARY,
6124     dts_type_find, "dts", DTS_CAPS, NULL, NULL);
6125 GST_TYPE_FIND_REGISTER_DEFINE (gsm, "audio/x-gsm", GST_RANK_PRIMARY, NULL,
6126     "gsm", GSM_CAPS, NULL, NULL);
6127 GST_TYPE_FIND_REGISTER_DEFINE (mpeg_sys, "video/mpeg-sys", GST_RANK_PRIMARY,
6128     mpeg_sys_type_find, "mpe,mpeg,mpg", MPEG_SYS_CAPS, NULL, NULL);
6129 GST_TYPE_FIND_REGISTER_DEFINE (mpeg_ts, "video/mpegts", GST_RANK_PRIMARY,
6130     mpeg_ts_type_find, "ts,mts", MPEGTS_CAPS, NULL, NULL);
6131 GST_TYPE_FIND_REGISTER_DEFINE (ogganx, "application/ogg", GST_RANK_PRIMARY,
6132     ogganx_type_find, "ogg,oga,ogv,ogm,ogx,spx,anx,axa,axv", OGG_CAPS, NULL,
6133     NULL);
6134 GST_TYPE_FIND_REGISTER_DEFINE (mpeg_video_stream, "video/mpeg-elementary",
6135     GST_RANK_MARGINAL, mpeg_video_stream_type_find, "mpv,mpeg,mpg",
6136     MPEG_VIDEO_CAPS, NULL, NULL);
6137 GST_TYPE_FIND_REGISTER_DEFINE (mpeg4_video, "video/mpeg4", GST_RANK_PRIMARY,
6138     mpeg4_video_type_find, "m4v", MPEG_VIDEO_CAPS, NULL, NULL);
6139 #ifdef OHOS_OPT_COMPAT
6140 /*
6141  * ohos.opt.compat.0015
6142  * mp3: mpeg audio stream is incorrectly identified as video:h263 or video:h264 or video:h265 video stream,
6143  * which causes playback failure and lowers the score
6144  */
6145 #else
6146 GST_TYPE_FIND_REGISTER_DEFINE (h263_video, "video/x-h263", GST_RANK_SECONDARY,
6147     h263_video_type_find, "h263,263", H263_VIDEO_CAPS, NULL, NULL);
6148 GST_TYPE_FIND_REGISTER_DEFINE (h264_video, "video/x-h264", GST_RANK_PRIMARY,
6149     h264_video_type_find, "h264,x264,264", H264_VIDEO_CAPS, NULL, NULL);
6150 GST_TYPE_FIND_REGISTER_DEFINE (h265_video, "video/x-h265", GST_RANK_PRIMARY,
6151     h265_video_type_find, "h265,x265,265", H265_VIDEO_CAPS, NULL, NULL);
6152 #endif
6153 GST_TYPE_FIND_REGISTER_DEFINE (nuv, "video/x-nuv", GST_RANK_SECONDARY,
6154     nuv_type_find, "nuv", NUV_CAPS, NULL, NULL);
6155 /* ISO formats */
6156 GST_TYPE_FIND_REGISTER_DEFINE (m4a, "audio/x-m4a", GST_RANK_PRIMARY,
6157     m4a_type_find, "m4a", M4A_CAPS, NULL, NULL);
6158 GST_TYPE_FIND_REGISTER_DEFINE (q3gp, "application/x-3gp", GST_RANK_PRIMARY,
6159     q3gp_type_find, "3gp", Q3GP_CAPS, NULL, NULL);
6160 GST_TYPE_FIND_REGISTER_DEFINE (qt, "video/quicktime", GST_RANK_PRIMARY,
6161     qt_type_find, "mov,mp4", QT_CAPS, NULL, NULL);
6162 GST_TYPE_FIND_REGISTER_DEFINE (qtif, "image/x-quicktime", GST_RANK_SECONDARY,
6163     qtif_type_find, "qif,qtif,qti", QTIF_CAPS, NULL, NULL);
6164 GST_TYPE_FIND_REGISTER_DEFINE (jp2, "image/jp2", GST_RANK_PRIMARY,
6165     jp2_type_find, "jp2", JP2_CAPS, NULL, NULL);
6166 GST_TYPE_FIND_REGISTER_DEFINE (jpc, "image/x-jpc", GST_RANK_PRIMARY,
6167     jpc_type_find, "jpc,j2k", JPC_CAPS, NULL, NULL);
6168 GST_TYPE_FIND_REGISTER_DEFINE (mj2, "video/mj2", GST_RANK_PRIMARY,
6169     jp2_type_find, "mj2", MJ2_CAPS, NULL, NULL);
6170 GST_TYPE_FIND_REGISTER_DEFINE (html, "text/html", GST_RANK_SECONDARY,
6171     html_type_find, "htm,html", HTML_CAPS, NULL, NULL);
6172 GST_TYPE_FIND_REGISTER_DEFINE (swf, "application/x-shockwave-flash",
6173     GST_RANK_SECONDARY, swf_type_find, "swf,swfl", SWF_CAPS, NULL, NULL);
6174 GST_TYPE_FIND_REGISTER_DEFINE (xges, "application/xges",
6175     GST_RANK_PRIMARY, xges_type_find, "xges", XGES_CAPS, NULL, NULL);
6176 GST_TYPE_FIND_REGISTER_DEFINE (xmeml, "application/vnd.apple-xmeml+xml",
6177     GST_RANK_SECONDARY, xmeml_type_find, "xmeml", XMEML_CAPS, NULL, NULL);
6178 GST_TYPE_FIND_REGISTER_DEFINE (fcpxml, "application/vnd.apple-fcp+xml",
6179     GST_RANK_SECONDARY, fcpxml_type_find, "fcpxml", FCPXML_CAPS, NULL, NULL);
6180 GST_TYPE_FIND_REGISTER_DEFINE (otio,
6181     "application/vnd.pixar.opentimelineio+json", GST_RANK_SECONDARY,
6182     otio_type_find, "otio", OTIO_CAPS, NULL, NULL);
6183 GST_TYPE_FIND_REGISTER_DEFINE (dash_mpd, "application/dash+xml",
6184     GST_RANK_PRIMARY, dash_mpd_type_find, "mpd,MPD", DASH_CAPS, NULL, NULL);
6185 GST_TYPE_FIND_REGISTER_DEFINE (mss_manifest, "application/vnd.ms-sstr+xml",
6186     GST_RANK_PRIMARY, mss_manifest_type_find, NULL, MSS_MANIFEST_CAPS, NULL,
6187     NULL);
6188 GST_TYPE_FIND_REGISTER_DEFINE (utf8, "text/plain", GST_RANK_MARGINAL,
6189     utf8_type_find, "txt", UTF8_CAPS, NULL, NULL);
6190 GST_TYPE_FIND_REGISTER_DEFINE (utf16, "text/utf-16", GST_RANK_MARGINAL,
6191     utf16_type_find, "txt", UTF16_CAPS, NULL, NULL);
6192 GST_TYPE_FIND_REGISTER_DEFINE (utf32, "text/utf-32", GST_RANK_MARGINAL,
6193     utf32_type_find, "txt", UTF32_CAPS, NULL, NULL);
6194 GST_TYPE_FIND_REGISTER_DEFINE (uri, "text/uri-list", GST_RANK_MARGINAL,
6195     uri_type_find, "ram", URI_CAPS, NULL, NULL);
6196 GST_TYPE_FIND_REGISTER_DEFINE (itc, "application/itc", GST_RANK_SECONDARY,
6197     itc_type_find, "itc", ITC_CAPS, NULL, NULL);
6198 GST_TYPE_FIND_REGISTER_DEFINE (hls, "application/x-hls", GST_RANK_MARGINAL,
6199     hls_type_find, "m3u8", HLS_CAPS, NULL, NULL);
6200 GST_TYPE_FIND_REGISTER_DEFINE (sdp, "application/sdp", GST_RANK_SECONDARY,
6201     sdp_type_find, "sdp", SDP_CAPS, NULL, NULL);
6202 GST_TYPE_FIND_REGISTER_DEFINE (smil, "application/smil", GST_RANK_SECONDARY,
6203     smil_type_find, "smil", SMIL_CAPS, NULL, NULL);
6204 GST_TYPE_FIND_REGISTER_DEFINE (ttml_xml, "application/ttml+xml",
6205     GST_RANK_SECONDARY, ttml_xml_type_find, "ttml+xml", TTML_XML_CAPS, NULL,
6206     NULL);
6207 GST_TYPE_FIND_REGISTER_DEFINE (xml, "application/xml", GST_RANK_MARGINAL,
6208     xml_type_find, "xml", GENERIC_XML_CAPS, NULL, NULL);
6209 GST_TYPE_FIND_REGISTER_DEFINE (aiff, "audio/x-aiff", GST_RANK_SECONDARY,
6210     aiff_type_find, "aiff,aif,aifc", AIFF_CAPS, NULL, NULL);
6211 GST_TYPE_FIND_REGISTER_DEFINE (svx, "audio/x-svx", GST_RANK_SECONDARY,
6212     svx_type_find, "iff,svx", SVX_CAPS, NULL, NULL);
6213 GST_TYPE_FIND_REGISTER_DEFINE (paris, "audio/x-paris", GST_RANK_SECONDARY,
6214     paris_type_find, "paf", PARIS_CAPS, NULL, NULL);
6215 GST_TYPE_FIND_REGISTER_DEFINE (sds, "audio/x-sds", GST_RANK_SECONDARY,
6216     sds_type_find, "sds", SDS_CAPS, NULL, NULL);
6217 GST_TYPE_FIND_REGISTER_DEFINE (ircam, "audio/x-ircam", GST_RANK_SECONDARY,
6218     ircam_type_find, "sf", IRCAM_CAPS, NULL, NULL);
6219 GST_TYPE_FIND_REGISTER_DEFINE (shn, "audio/x-shorten", GST_RANK_SECONDARY,
6220     shn_type_find, "shn", SHN_CAPS, NULL, NULL);
6221 GST_TYPE_FIND_REGISTER_DEFINE (ape, "application/x-ape", GST_RANK_SECONDARY,
6222     ape_type_find, "ape", APE_CAPS, NULL, NULL);
6223 GST_TYPE_FIND_REGISTER_DEFINE (jpeg, "image/jpeg", GST_RANK_PRIMARY + 15,
6224     jpeg_type_find, "jpg,jpe,jpeg", JPEG_CAPS, NULL, NULL);
6225 GST_TYPE_FIND_REGISTER_DEFINE (bmp, "image/bmp", GST_RANK_PRIMARY,
6226     bmp_type_find, "bmp", BMP_CAPS, NULL, NULL);
6227 GST_TYPE_FIND_REGISTER_DEFINE (tiff, "image/tiff", GST_RANK_PRIMARY,
6228     tiff_type_find, "tif,tiff", TIFF_CAPS, NULL, NULL);
6229 GST_TYPE_FIND_REGISTER_DEFINE (exr, "image/x-exr", GST_RANK_PRIMARY,
6230     exr_type_find, "exr", EXR_CAPS, NULL, NULL);
6231 GST_TYPE_FIND_REGISTER_DEFINE (pnm, "image/x-portable-pixmap",
6232     GST_RANK_SECONDARY, pnm_type_find, "pnm,ppm,pgm,pbm", PNM_CAPS, NULL, NULL);
6233 GST_TYPE_FIND_REGISTER_DEFINE (matroska, "video/x-matroska", GST_RANK_PRIMARY,
6234     matroska_type_find, "mkv,mka,mk3d,webm", MATROSKA_CAPS, NULL, NULL);
6235 GST_TYPE_FIND_REGISTER_DEFINE (mxf, "application/mxf", GST_RANK_PRIMARY,
6236     mxf_type_find, "mxf", MXF_CAPS, NULL, NULL);
6237 GST_TYPE_FIND_REGISTER_DEFINE (dv, "video/x-dv", GST_RANK_SECONDARY,
6238     dv_type_find, "dv,dif", DV_CAPS, NULL, NULL);
6239 GST_TYPE_FIND_REGISTER_DEFINE (ilbc, "audio/iLBC-sh", GST_RANK_PRIMARY,
6240     ilbc_type_find, "ilbc", ILBC_CAPS, NULL, NULL);
6241 GST_TYPE_FIND_REGISTER_DEFINE (sbc, "audio/x-sbc", GST_RANK_MARGINAL,
6242     sbc_type_find, "sbc", SBC_CAPS, NULL, NULL);
6243 GST_TYPE_FIND_REGISTER_DEFINE (kate, "subtitle/x-kate", GST_RANK_MARGINAL,
6244     kate_type_find, NULL, NULL, NULL, NULL);
6245 GST_TYPE_FIND_REGISTER_DEFINE (webvtt, "application/x-subtitle-vtt",
6246     GST_RANK_MARGINAL, webvtt_type_find, "vtt", WEBVTT_CAPS, NULL, NULL);
6247 GST_TYPE_FIND_REGISTER_DEFINE (flac, "audio/x-flac", GST_RANK_PRIMARY,
6248     flac_type_find, "flac", FLAC_CAPS, NULL, NULL);
6249 GST_TYPE_FIND_REGISTER_DEFINE (vorbis, "audio/x-vorbis", GST_RANK_PRIMARY,
6250     vorbis_type_find, NULL, VORBIS_CAPS, NULL, NULL);
6251 GST_TYPE_FIND_REGISTER_DEFINE (theora, "video/x-theora", GST_RANK_PRIMARY,
6252     theora_type_find, NULL, THEORA_CAPS, NULL, NULL);
6253 GST_TYPE_FIND_REGISTER_DEFINE (ogmvideo, "application/x-ogm-video",
6254     GST_RANK_PRIMARY, ogmvideo_type_find, NULL, OGMVIDEO_CAPS, NULL, NULL);
6255 GST_TYPE_FIND_REGISTER_DEFINE (ogmaudio, "application/x-ogm-audio",
6256     GST_RANK_PRIMARY, ogmaudio_type_find, NULL, OGMAUDIO_CAPS, NULL, NULL);
6257 GST_TYPE_FIND_REGISTER_DEFINE (ogmtext, "application/x-ogm-text",
6258     GST_RANK_PRIMARY, ogmtext_type_find, NULL, OGMTEXT_CAPS, NULL, NULL);
6259 GST_TYPE_FIND_REGISTER_DEFINE (speex, "audio/x-speex", GST_RANK_PRIMARY,
6260     speex_type_find, NULL, SPEEX_CAPS, NULL, NULL);
6261 GST_TYPE_FIND_REGISTER_DEFINE (celt, "audio/x-celt", GST_RANK_PRIMARY,
6262     celt_type_find, NULL, CELT_CAPS, NULL, NULL);
6263 GST_TYPE_FIND_REGISTER_DEFINE (oggskel, "application/x-ogg-skeleton",
6264     GST_RANK_PRIMARY, oggskel_type_find, NULL, OGG_SKELETON_CAPS, NULL, NULL);
6265 GST_TYPE_FIND_REGISTER_DEFINE (cmml, "text/x-cmml", GST_RANK_PRIMARY,
6266     cmml_type_find, NULL, CMML_CAPS, NULL, NULL);
6267 GST_TYPE_FIND_REGISTER_DEFINE (aac, "audio/aac", GST_RANK_SECONDARY,
6268     aac_type_find, "aac,adts,adif,loas", AAC_CAPS, NULL, NULL);
6269 GST_TYPE_FIND_REGISTER_DEFINE (wavpack_wvp, "audio/x-wavpack",
6270     GST_RANK_SECONDARY, wavpack_type_find, "wv,wvp", WAVPACK_CAPS, NULL, NULL);
6271 GST_TYPE_FIND_REGISTER_DEFINE (wavpack_wvc, "audio/x-wavpack-correction",
6272     GST_RANK_SECONDARY, wavpack_type_find, "wvc", WAVPACK_CORRECTION_CAPS, NULL,
6273     NULL);
6274 GST_TYPE_FIND_REGISTER_DEFINE (postscript, "application/postscript",
6275     GST_RANK_SECONDARY, postscript_type_find, "ps", POSTSCRIPT_CAPS, NULL,
6276     NULL);
6277 GST_TYPE_FIND_REGISTER_DEFINE (svg, "image/svg+xml", GST_RANK_SECONDARY,
6278     svg_type_find, "svg", SVG_CAPS, NULL, NULL);
6279 GST_TYPE_FIND_REGISTER_DEFINE (tar, "application/x-tar", GST_RANK_SECONDARY,
6280     tar_type_find, "tar", TAR_CAPS, NULL, NULL);
6281 GST_TYPE_FIND_REGISTER_DEFINE (ar, "application/x-ar", GST_RANK_SECONDARY,
6282     ar_type_find, "a", AR_CAPS, NULL, NULL);
6283 GST_TYPE_FIND_REGISTER_DEFINE (msdos, "application/x-ms-dos-executable",
6284     GST_RANK_SECONDARY, msdos_type_find, "dll,exe,ocx,sys,scr,msstyles,cpl",
6285     MSDOS_CAPS, NULL, NULL);
6286 GST_TYPE_FIND_REGISTER_DEFINE (dirac, "video/x-dirac", GST_RANK_PRIMARY,
6287     dirac_type_find, NULL, DIRAC_CAPS, NULL, NULL);
6288 GST_TYPE_FIND_REGISTER_DEFINE (multipart, "multipart/x-mixed-replace",
6289     GST_RANK_SECONDARY, multipart_type_find, NULL, MULTIPART_CAPS, NULL, NULL);
6290 GST_TYPE_FIND_REGISTER_DEFINE (mmsh, "application/x-mmsh", GST_RANK_SECONDARY,
6291     mmsh_type_find, NULL, MMSH_CAPS, NULL, NULL);
6292 GST_TYPE_FIND_REGISTER_DEFINE (vivo, "video/vivo", GST_RANK_SECONDARY,
6293     vivo_type_find, "viv", VIVO_CAPS, NULL, NULL);
6294 GST_TYPE_FIND_REGISTER_DEFINE (wbmp, "image/vnd.wap.wbmp", GST_RANK_MARGINAL,
6295     wbmp_typefind, NULL, NULL, NULL, NULL);
6296 GST_TYPE_FIND_REGISTER_DEFINE (y4m, "application/x-yuv4mpeg",
6297     GST_RANK_SECONDARY, y4m_typefind, NULL, NULL, NULL, NULL);
6298 GST_TYPE_FIND_REGISTER_DEFINE (windows_icon, "image/x-icon", GST_RANK_MARGINAL,
6299     windows_icon_typefind, NULL, NULL, NULL, NULL);
6300 #ifdef USE_GIO
6301 GST_TYPE_FIND_REGISTER_DEFINE (xdgmime, "xdgmime-base", GST_RANK_MARGINAL,
6302     xdgmime_typefind, NULL, NULL, NULL, NULL);
6303 #endif
6304 GST_TYPE_FIND_REGISTER_DEFINE (degas, "image/x-degas", GST_RANK_MARGINAL,
6305     degas_type_find, NULL, NULL, NULL, NULL);
6306 GST_TYPE_FIND_REGISTER_DEFINE (dvdiso, "application/octet-stream",
6307     GST_RANK_MARGINAL, dvdiso_type_find, NULL, NULL, NULL, NULL);
6308 GST_TYPE_FIND_REGISTER_DEFINE (ssa, "application/x-ssa", GST_RANK_SECONDARY,
6309     ssa_type_find, "ssa,ass", NULL, NULL, NULL);
6310 GST_TYPE_FIND_REGISTER_DEFINE (pva, "video/x-pva", GST_RANK_SECONDARY,
6311     pva_type_find, "pva", PVA_CAPS, NULL, NULL);
6312 GST_TYPE_FIND_REGISTER_DEFINE (aa, "audio/audible", GST_RANK_MARGINAL,
6313     aa_type_find, "aa,aax", AA_CAPS, NULL, NULL);
6314 GST_TYPE_FIND_REGISTER_DEFINE (tap, "audio/x-tap-tap", GST_RANK_PRIMARY,
6315     tap_type_find, "tap", TAP_CAPS, NULL, NULL);
6316