1 /* GStreamer EBML I/O
2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
3 *
4 * ebml-read.c: read EBML data from file/stream
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 */
21
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <string.h>
27
28 #include "ebml-read.h"
29 #include "ebml-ids.h"
30
31 #include <gst/math-compat.h>
32
33 GST_DEBUG_CATEGORY (ebmlread_debug);
34 #define GST_CAT_DEFAULT ebmlread_debug
35
36 /* Peeks following element id and element length in datastream provided
37 * by @peek with @ctx as user data.
38 * Returns GST_FLOW_EOS if not enough data to read id and length.
39 * Otherwise, @needed provides the prefix length (id + length), and
40 * @length provides element length.
41 *
42 * @object and @offset are provided for informative messaging/debug purposes.
43 */
44 GstFlowReturn
gst_ebml_peek_id_length(guint32 * _id,guint64 * _length,guint * _needed,GstPeekData peek,gpointer * ctx,GstElement * el,guint64 offset)45 gst_ebml_peek_id_length (guint32 * _id, guint64 * _length, guint * _needed,
46 GstPeekData peek, gpointer * ctx, GstElement * el, guint64 offset)
47 {
48 guint needed;
49 const guint8 *buf;
50 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
51 guint64 total;
52 guint8 b;
53 GstFlowReturn ret;
54
55 g_return_val_if_fail (_id != NULL, GST_FLOW_ERROR);
56 g_return_val_if_fail (_length != NULL, GST_FLOW_ERROR);
57 g_return_val_if_fail (_needed != NULL, GST_FLOW_ERROR);
58
59 /* well ... */
60 *_id = (guint32) GST_EBML_SIZE_UNKNOWN;
61 *_length = GST_EBML_SIZE_UNKNOWN;
62
63 /* read element id */
64 needed = 2;
65 ret = peek (ctx, needed, &buf);
66 if (ret != GST_FLOW_OK)
67 goto peek_error;
68 b = GST_READ_UINT8 (buf);
69 total = (guint64) b;
70 while (read <= 4 && !(total & len_mask)) {
71 read++;
72 len_mask >>= 1;
73 }
74 if (G_UNLIKELY (read > 4))
75 goto invalid_id;
76
77 /* need id and at least something for subsequent length */
78 needed = read + 1;
79 ret = peek (ctx, needed, &buf);
80 if (ret != GST_FLOW_OK)
81 goto peek_error;
82 while (n < read) {
83 b = GST_READ_UINT8 (buf + n);
84 total = (total << 8) | b;
85 ++n;
86 }
87 *_id = (guint32) total;
88
89 /* read element length */
90 b = GST_READ_UINT8 (buf + n);
91 total = (guint64) b;
92 len_mask = 0x80;
93 read = 1;
94 while (read <= 8 && !(total & len_mask)) {
95 read++;
96 len_mask >>= 1;
97 }
98 if (G_UNLIKELY (read > 8))
99 goto invalid_length;
100 if ((total &= (len_mask - 1)) == len_mask - 1)
101 num_ffs++;
102
103 needed += read - 1;
104 ret = peek (ctx, needed, &buf);
105 if (ret != GST_FLOW_OK)
106 goto peek_error;
107 buf += (needed - read);
108 n = 1;
109 while (n < read) {
110 guint8 b = GST_READ_UINT8 (buf + n);
111
112 if (G_UNLIKELY (b == 0xff))
113 num_ffs++;
114 total = (total << 8) | b;
115 ++n;
116 }
117
118 if (G_UNLIKELY (read == num_ffs))
119 *_length = G_MAXUINT64;
120 else
121 *_length = total;
122
123 *_needed = needed;
124
125 return GST_FLOW_OK;
126
127 /* ERRORS */
128 peek_error:
129 {
130 if (ret != GST_FLOW_FLUSHING && ret != GST_FLOW_EOS)
131 GST_WARNING_OBJECT (el, "peek failed, ret = %s", gst_flow_get_name (ret));
132 else
133 GST_DEBUG_OBJECT (el, "peek failed, ret = %s", gst_flow_get_name (ret));
134 *_needed = needed;
135 return ret;
136 }
137 invalid_id:
138 {
139 GST_ERROR_OBJECT (el,
140 "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
141 G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
142 return GST_FLOW_ERROR;
143 }
144 invalid_length:
145 {
146 GST_ERROR_OBJECT (el,
147 "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
148 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
149 return GST_FLOW_ERROR;
150 }
151 }
152
153 /* setup for parsing @buf at position @offset on behalf of @el.
154 * Takes ownership of @buf. */
155 void
gst_ebml_read_init(GstEbmlRead * ebml,GstElement * el,GstBuffer * buf,guint64 offset)156 gst_ebml_read_init (GstEbmlRead * ebml, GstElement * el, GstBuffer * buf,
157 guint64 offset)
158 {
159 GstEbmlMaster m;
160
161 g_return_if_fail (el);
162 g_return_if_fail (buf);
163
164 ebml->el = el;
165 ebml->offset = offset;
166 ebml->buf = buf;
167 gst_buffer_map (buf, &ebml->map, GST_MAP_READ);
168 ebml->readers = g_array_sized_new (FALSE, FALSE, sizeof (GstEbmlMaster), 10);
169 m.offset = ebml->offset;
170 gst_byte_reader_init (&m.br, ebml->map.data, ebml->map.size);
171 g_array_append_val (ebml->readers, m);
172 }
173
174 void
gst_ebml_read_clear(GstEbmlRead * ebml)175 gst_ebml_read_clear (GstEbmlRead * ebml)
176 {
177 if (ebml->readers)
178 g_array_unref (ebml->readers);
179 ebml->readers = NULL;
180 if (ebml->buf) {
181 gst_buffer_unmap (ebml->buf, &ebml->map);
182 gst_buffer_unref (ebml->buf);
183 }
184 ebml->buf = NULL;
185 ebml->el = NULL;
186 }
187
188 static GstFlowReturn
gst_ebml_read_peek(GstByteReader * br,guint peek,const guint8 ** data)189 gst_ebml_read_peek (GstByteReader * br, guint peek, const guint8 ** data)
190 {
191 if (G_LIKELY (gst_byte_reader_peek_data (br, peek, data)))
192 return GST_FLOW_OK;
193 else
194 return GST_FLOW_EOS;
195 }
196
197 static GstFlowReturn
gst_ebml_peek_id_full(GstEbmlRead * ebml,guint32 * id,guint64 * length,guint * prefix)198 gst_ebml_peek_id_full (GstEbmlRead * ebml, guint32 * id, guint64 * length,
199 guint * prefix)
200 {
201 GstFlowReturn ret;
202
203 ret = gst_ebml_peek_id_length (id, length, prefix,
204 (GstPeekData) gst_ebml_read_peek, (gpointer) gst_ebml_read_br (ebml),
205 ebml->el, gst_ebml_read_get_pos (ebml));
206 if (ret != GST_FLOW_OK)
207 return ret;
208
209 GST_LOG_OBJECT (ebml->el, "id 0x%x at offset 0x%" G_GINT64_MODIFIER "x"
210 " of length %" G_GUINT64_FORMAT ", prefix %d", *id,
211 gst_ebml_read_get_pos (ebml), *length, *prefix);
212
213 #ifndef GST_DISABLE_GST_DEBUG
214 if (ebmlread_debug->threshold >= GST_LEVEL_LOG) {
215 const guint8 *data = NULL;
216 GstByteReader *br = gst_ebml_read_br (ebml);
217 guint size = gst_byte_reader_get_remaining (br);
218
219 if (gst_byte_reader_peek_data (br, size, &data)) {
220
221 GST_LOG_OBJECT (ebml->el, "current br %p; remaining %d", br, size);
222 if (data)
223 GST_MEMDUMP_OBJECT (ebml->el, "element", data, MIN (size, *length));
224 }
225 }
226 #endif
227
228 return ret;
229 }
230
231 GstFlowReturn
gst_ebml_peek_id(GstEbmlRead * ebml,guint32 * id)232 gst_ebml_peek_id (GstEbmlRead * ebml, guint32 * id)
233 {
234 guint64 length;
235 guint needed;
236
237 return gst_ebml_peek_id_full (ebml, id, &length, &needed);
238 }
239
240 /*
241 * Read the next element, the contents are supposed to be sub-elements which
242 * can be read separately. A new bytereader is setup for doing so.
243 */
244 GstFlowReturn
gst_ebml_read_master(GstEbmlRead * ebml,guint32 * id)245 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
246 {
247 guint64 length;
248 guint prefix;
249 const guint8 *data = NULL;
250 GstFlowReturn ret;
251 GstEbmlMaster m;
252
253 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
254 if (ret != GST_FLOW_OK)
255 return ret;
256
257 /* we just at least peeked the id */
258 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix))
259 return GST_FLOW_ERROR; /* FIXME: do proper error handling */
260
261 m.offset = gst_ebml_read_get_pos (ebml);
262 if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, &data))
263 return GST_FLOW_PARSE;
264
265 GST_LOG_OBJECT (ebml->el, "pushing level %d at offset %" G_GUINT64_FORMAT,
266 ebml->readers->len, m.offset);
267 gst_byte_reader_init (&m.br, data, length);
268 g_array_append_val (ebml->readers, m);
269
270 return GST_FLOW_OK;
271 }
272
273 /* explicitly pop a bytereader from stack. Usually invoked automagically. */
274 GstFlowReturn
gst_ebml_read_pop_master(GstEbmlRead * ebml)275 gst_ebml_read_pop_master (GstEbmlRead * ebml)
276 {
277 g_return_val_if_fail (ebml->readers, GST_FLOW_ERROR);
278
279 /* never remove initial bytereader */
280 if (ebml->readers->len > 1) {
281 GST_LOG_OBJECT (ebml->el, "popping level %d", ebml->readers->len - 1);
282 g_array_remove_index (ebml->readers, ebml->readers->len - 1);
283 }
284
285 return GST_FLOW_OK;
286 }
287
288 /*
289 * Skip the next element.
290 */
291
292 GstFlowReturn
gst_ebml_read_skip(GstEbmlRead * ebml)293 gst_ebml_read_skip (GstEbmlRead * ebml)
294 {
295 guint64 length;
296 guint32 id;
297 guint prefix;
298 GstFlowReturn ret;
299
300 ret = gst_ebml_peek_id_full (ebml, &id, &length, &prefix);
301 if (ret != GST_FLOW_OK)
302 return ret;
303
304 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), length + prefix))
305 return GST_FLOW_PARSE;
306
307 return ret;
308 }
309
310 /*
311 * Read the next element as a GstBuffer (binary).
312 */
313
314 GstFlowReturn
gst_ebml_read_buffer(GstEbmlRead * ebml,guint32 * id,GstBuffer ** buf)315 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
316 {
317 guint64 length;
318 guint prefix;
319 GstFlowReturn ret;
320
321 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
322 if (ret != GST_FLOW_OK)
323 return ret;
324
325 /* we just at least peeked the id */
326 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix))
327 return GST_FLOW_ERROR; /* FIXME: do proper error handling */
328
329 if (G_LIKELY (length > 0)) {
330 guint offset;
331
332 offset = gst_ebml_read_get_pos (ebml) - ebml->offset;
333 if (G_LIKELY (gst_byte_reader_skip (gst_ebml_read_br (ebml), length))) {
334 *buf = gst_buffer_copy_region (ebml->buf, GST_BUFFER_COPY_ALL,
335 offset, length);
336 } else {
337 *buf = NULL;
338 return GST_FLOW_PARSE;
339 }
340 } else {
341 *buf = gst_buffer_new ();
342 }
343
344 return ret;
345 }
346
347 /*
348 * Read the next element, return a pointer to it and its size.
349 */
350
351 static GstFlowReturn
gst_ebml_read_bytes(GstEbmlRead * ebml,guint32 * id,const guint8 ** data,guint * size)352 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, const guint8 ** data,
353 guint * size)
354 {
355 guint64 length;
356 guint prefix;
357 GstFlowReturn ret;
358
359 *size = 0;
360
361 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
362 if (ret != GST_FLOW_OK)
363 return ret;
364
365 /* we just at least peeked the id */
366 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix))
367 return GST_FLOW_ERROR; /* FIXME: do proper error handling */
368
369 /* This shouldn't happen here with the elements read through this function */
370 if (length == GST_EBML_SIZE_UNKNOWN || length == G_MAXUINT64) {
371 GST_ERROR_OBJECT (ebml->el, "element 0x%x has undefined length!", *id);
372 return GST_FLOW_ERROR;
373 }
374
375 /* Sanity check since we're downcasting a 64-bit len to possibly 32-bit here */
376 if (length >= G_MAXUINT) {
377 GST_ERROR_OBJECT (ebml->el, "element 0x%x too large, "
378 "size %" G_GUINT64_FORMAT, *id, length);
379 return GST_FLOW_ERROR;
380 }
381
382 *data = NULL;
383 if (G_LIKELY (length > 0)) {
384 if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, data))
385 return GST_FLOW_PARSE;
386 }
387
388 *size = length;
389
390 return ret;
391 }
392
393 /*
394 * Read the next element as an unsigned int.
395 */
396
397 GstFlowReturn
gst_ebml_read_uint(GstEbmlRead * ebml,guint32 * id,guint64 * num)398 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
399 {
400 const guint8 *data;
401 guint size;
402 GstFlowReturn ret;
403
404 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
405 if (ret != GST_FLOW_OK)
406 return ret;
407
408 if (size > 8) {
409 GST_ERROR_OBJECT (ebml->el,
410 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
411 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
412 gst_ebml_read_get_pos (ebml) - size);
413 return GST_FLOW_ERROR;
414 }
415
416 if (size == 0) {
417 *num = 0;
418 return ret;
419 }
420
421 *num = 0;
422 while (size > 0) {
423 *num = (*num << 8) | *data;
424 size--;
425 data++;
426 }
427
428 return ret;
429 }
430
431 /*
432 * Read the next element as a signed int.
433 */
434
435 GstFlowReturn
gst_ebml_read_sint(GstEbmlRead * ebml,guint32 * id,gint64 * num)436 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
437 {
438 const guint8 *data;
439 guint size;
440 gboolean negative = 0;
441 GstFlowReturn ret;
442
443 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
444 if (ret != GST_FLOW_OK)
445 return ret;
446
447 if (size > 8) {
448 GST_ERROR_OBJECT (ebml->el,
449 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
450 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
451 gst_ebml_read_get_pos (ebml) - size);
452 return GST_FLOW_ERROR;
453 }
454
455 if (size == 0) {
456 *num = 0;
457 return ret;
458 }
459
460 *num = 0;
461 if (*data & 0x80) {
462 negative = 1;
463 *num = *data & ~0x80;
464 size--;
465 data++;
466 }
467
468 while (size > 0) {
469 *num = (*num << 8) | *data;
470 size--;
471 data++;
472 }
473
474 /* make signed */
475 if (negative) {
476 *num = 0 - *num;
477 }
478
479 return ret;
480 }
481
482 /* Convert 80 bit extended precision float in big endian format to double.
483 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
484 * licensed under LGPL */
485
486 struct _ext_float
487 {
488 guint8 exponent[2];
489 guint8 mantissa[8];
490 };
491
492 static gdouble
_ext2dbl(const guint8 * data)493 _ext2dbl (const guint8 * data)
494 {
495 struct _ext_float ext;
496 guint64 m = 0;
497 gint e, i;
498
499 memcpy (&ext.exponent, data, 2);
500 memcpy (&ext.mantissa, data + 2, 8);
501
502 for (i = 0; i < 8; i++)
503 m = (m << 8) + ext.mantissa[i];
504 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
505 if (e == 0x7fff && m)
506 return NAN;
507 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
508 * mantissa bit is written as opposed to the
509 * single and double precision formats */
510 if (ext.exponent[0] & 0x80)
511 m = -m;
512 return ldexp (m, e);
513 }
514
515 /*
516 * Read the next element as a float.
517 */
518
519 GstFlowReturn
gst_ebml_read_float(GstEbmlRead * ebml,guint32 * id,gdouble * num)520 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
521 {
522 const guint8 *data;
523 guint size;
524 GstFlowReturn ret;
525
526 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
527 if (ret != GST_FLOW_OK)
528 return ret;
529
530 if (size != 0 && size != 4 && size != 8 && size != 10) {
531 GST_ERROR_OBJECT (ebml->el,
532 "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
533 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
534 gst_ebml_read_get_pos (ebml) - size);
535 return GST_FLOW_ERROR;
536 }
537
538 if (size == 4) {
539 gfloat f;
540
541 memcpy (&f, data, 4);
542 f = GFLOAT_FROM_BE (f);
543
544 *num = f;
545 } else if (size == 8) {
546 gdouble d;
547
548 memcpy (&d, data, 8);
549 d = GDOUBLE_FROM_BE (d);
550
551 *num = d;
552 } else if (size == 10) {
553 *num = _ext2dbl (data);
554 } else {
555 /* size == 0 means a value of 0.0 */
556 *num = 0.0;
557 }
558
559 return ret;
560 }
561
562 /*
563 * Read the next element as a C string.
564 */
565
566 static GstFlowReturn
gst_ebml_read_string(GstEbmlRead * ebml,guint32 * id,gchar ** str)567 gst_ebml_read_string (GstEbmlRead * ebml, guint32 * id, gchar ** str)
568 {
569 const guint8 *data;
570 guint size;
571 GstFlowReturn ret;
572
573 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
574 if (ret != GST_FLOW_OK)
575 return ret;
576
577 *str = g_malloc (size + 1);
578 memcpy (*str, data, size);
579 (*str)[size] = '\0';
580
581 return ret;
582 }
583
584 /*
585 * Read the next element as an ASCII string.
586 */
587
588 GstFlowReturn
gst_ebml_read_ascii(GstEbmlRead * ebml,guint32 * id,gchar ** str_out)589 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str_out)
590 {
591 GstFlowReturn ret;
592 gchar *str;
593 gchar *iter;
594
595 #ifndef GST_DISABLE_GST_DEBUG
596 guint64 oldoff = ebml->offset;
597 #endif
598
599 ret = gst_ebml_read_string (ebml, id, &str);
600 if (ret != GST_FLOW_OK)
601 return ret;
602
603 for (iter = str; *iter != '\0'; iter++) {
604 if (G_UNLIKELY (*iter & 0x80)) {
605 GST_ERROR_OBJECT (ebml,
606 "Invalid ASCII string at offset %" G_GUINT64_FORMAT, oldoff);
607 g_free (str);
608 return GST_FLOW_ERROR;
609 }
610 }
611
612 *str_out = str;
613 return ret;
614 }
615
616 /*
617 * Read the next element as a UTF-8 string.
618 */
619
620 GstFlowReturn
gst_ebml_read_utf8(GstEbmlRead * ebml,guint32 * id,gchar ** str)621 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
622 {
623 GstFlowReturn ret;
624
625 #ifndef GST_DISABLE_GST_DEBUG
626 guint64 oldoff = gst_ebml_read_get_pos (ebml);
627 #endif
628
629 ret = gst_ebml_read_string (ebml, id, str);
630 if (ret != GST_FLOW_OK)
631 return ret;
632
633 if (str != NULL && *str != NULL && **str != '\0' &&
634 !g_utf8_validate (*str, -1, NULL)) {
635 GST_WARNING_OBJECT (ebml->el,
636 "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
637 }
638
639 return ret;
640 }
641
642 /*
643 * Read the next element as a date.
644 * Returns the nanoseconds since the unix epoch.
645 */
646
647 GstFlowReturn
gst_ebml_read_date(GstEbmlRead * ebml,guint32 * id,gint64 * date)648 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
649 {
650 gint64 ebml_date;
651 GstFlowReturn ret;
652
653 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
654 if (ret != GST_FLOW_OK)
655 return ret;
656
657 *date = ebml_date + GST_EBML_DATE_OFFSET;
658
659 return ret;
660 }
661
662 /*
663 * Read the next element as binary data.
664 */
665
666 GstFlowReturn
gst_ebml_read_binary(GstEbmlRead * ebml,guint32 * id,guint8 ** binary,guint64 * length)667 gst_ebml_read_binary (GstEbmlRead * ebml,
668 guint32 * id, guint8 ** binary, guint64 * length)
669 {
670 const guint8 *data;
671 guint size;
672 GstFlowReturn ret;
673
674 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
675 if (ret != GST_FLOW_OK)
676 return ret;
677
678 *length = size;
679 *binary = g_memdup2 (data, size);
680
681 return GST_FLOW_OK;
682 }
683