• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: t; c-basic-offset: 2 -*- */
2 /* Copyright 2006-2008 Tim-Philipp Müller <tim centricular net>
3  * Copyright 2005 Jan Schmidt <thaytan@mad.scientist.com>
4  * Copyright 2002,2003 Scott Wheeler <wheeler@kde.org> (portions from taglib)
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public
17  * License along with this library; if not, write to the
18  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25 
26 #include <string.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <gst/tag/tag.h>
30 #include <gst/base/gsttypefindhelper.h>
31 
32 #ifdef HAVE_ZLIB
33 #include <zlib.h>
34 #endif
35 
36 #include "id3v2.h"
37 
38 #ifndef GST_DISABLE_GST_DEBUG
39 #define GST_CAT_DEFAULT id3v2_ensure_debug_category()
40 #endif
41 
42 static gboolean parse_comment_frame (ID3TagsWorking * work);
43 static gchar *parse_url_link_frame (ID3TagsWorking * work,
44     const gchar ** tag_name);
45 static GArray *parse_text_identification_frame (ID3TagsWorking * work);
46 static gchar *parse_user_text_identification_frame (ID3TagsWorking * work,
47     const gchar ** tag_name);
48 static gchar *parse_unique_file_identifier (ID3TagsWorking * work,
49     const gchar ** tag_name);
50 static gboolean parse_relative_volume_adjustment_two (ID3TagsWorking * work);
51 static void parse_obsolete_tdat_frame (ID3TagsWorking * work);
52 static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work,
53     const gchar * tag_name, const gchar * tag_str);
54 /* Parse a single string into an array of gchar* */
55 static void parse_split_strings (guint8 encoding, gchar * data, gint data_size,
56     GArray ** out_fields);
57 static void free_tag_strings (GArray * fields);
58 static gboolean
59 id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
60     GArray * tag_fields);
61 static gboolean parse_picture_frame (ID3TagsWorking * work);
62 static gboolean parse_private_frame_data (ID3TagsWorking * work);
63 
64 #define ID3V2_ENCODING_ISO8859 0x00
65 #define ID3V2_ENCODING_UTF16   0x01
66 #define ID3V2_ENCODING_UTF16BE 0x02
67 #define ID3V2_ENCODING_UTF8    0x03
68 
69 gboolean
id3v2_parse_frame(ID3TagsWorking * work)70 id3v2_parse_frame (ID3TagsWorking * work)
71 {
72   const gchar *tag_name;
73   gboolean result = FALSE;
74   gint i;
75   guint8 *frame_data = work->hdr.frame_data;
76   guint frame_data_size = work->cur_frame_size;
77   gchar *tag_str = NULL;
78   GArray *tag_fields = NULL;
79   guint8 *uu_data = NULL;
80 
81 #ifdef HAVE_ZLIB
82   guint8 *uncompressed_data = NULL;
83 #endif
84 
85   /* Check that the frame id is valid */
86   for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) {
87     if (!g_ascii_isalnum (work->frame_id[i])) {
88       GST_DEBUG ("Encountered invalid frame_id");
89       return FALSE;
90     }
91   }
92 
93   /* Can't handle encrypted frames right now (in case we ever do, we'll have
94    * to do the decryption after the un-unsynchronisation and decompression,
95    * not here) */
96   if (work->frame_flags & ID3V2_FRAME_FORMAT_ENCRYPTION) {
97     GST_WARNING ("Encrypted frames are not supported");
98     return FALSE;
99   }
100 
101   tag_name = gst_tag_from_id3_tag (work->frame_id);
102   if (tag_name == NULL &&
103       strncmp (work->frame_id, "RVA2", 4) != 0 &&
104       strncmp (work->frame_id, "TXXX", 4) != 0 &&
105       strncmp (work->frame_id, "TDAT", 4) != 0 &&
106       strncmp (work->frame_id, "UFID", 4) != 0) {
107     return FALSE;
108   }
109 
110   if (work->frame_flags & (ID3V2_FRAME_FORMAT_COMPRESSION |
111           ID3V2_FRAME_FORMAT_DATA_LENGTH_INDICATOR)) {
112 #ifdef OHOS_OPT_CVE
113     if (frame_data_size <= 4)
114 #else
115     if (work->hdr.frame_data_size <= 4)
116 #endif
117       return FALSE;
118     if (ID3V2_VER_MAJOR (work->hdr.version) == 3) {
119       work->parse_size = GST_READ_UINT32_BE (frame_data);
120     } else {
121       work->parse_size = id3v2_read_synch_uint (frame_data, 4);
122     }
123     frame_data += 4;
124     frame_data_size -= 4;
125     GST_LOG ("Un-unsynced data size %d (of %d)", work->parse_size,
126         frame_data_size);
127     if (work->parse_size > frame_data_size) {
128       GST_WARNING ("ID3v2 frame %s data has invalid size %d (>%d)",
129           work->frame_id, work->parse_size, frame_data_size);
130       return FALSE;
131     }
132   }
133 
134   /* in v2.3 the frame sizes are not syncsafe, so the entire tag had to be
135    * unsynced. In v2.4 the frame sizes are syncsafe so it's just the frame
136    * data that needs un-unsyncing, but not the frame headers. */
137   if (ID3V2_VER_MAJOR (work->hdr.version) == 4) {
138     if ((work->hdr.flags & ID3V2_HDR_FLAG_UNSYNC) != 0 ||
139         ((work->frame_flags & ID3V2_FRAME_FORMAT_UNSYNCHRONISATION) != 0)) {
140       GST_DEBUG ("Un-unsyncing frame %s", work->frame_id);
141       uu_data = id3v2_ununsync_data (frame_data, &frame_data_size);
142       frame_data = uu_data;
143       GST_MEMDUMP ("ID3v2 frame (un-unsyced)", frame_data, frame_data_size);
144     }
145   }
146 
147   work->parse_size = frame_data_size;
148 
149   if (work->frame_flags & ID3V2_FRAME_FORMAT_COMPRESSION) {
150 #ifdef HAVE_ZLIB
151     uLongf destSize = work->parse_size;
152     Bytef *dest, *src;
153 
154     uncompressed_data = g_malloc (work->parse_size);
155 
156     dest = (Bytef *) uncompressed_data;
157     src = (Bytef *) frame_data;
158 
159     if (uncompress (dest, &destSize, src, frame_data_size) != Z_OK) {
160       g_free (uncompressed_data);
161       g_free (uu_data);
162       return FALSE;
163     }
164     if (destSize != work->parse_size) {
165       GST_WARNING
166           ("Decompressing ID3v2 frame %s did not produce expected size %d bytes (got %lu)",
167           tag_name, work->parse_size, destSize);
168       g_free (uncompressed_data);
169       g_free (uu_data);
170       return FALSE;
171     }
172     work->parse_data = uncompressed_data;
173 #else
174     GST_WARNING ("Compressed ID3v2 tag frame could not be decompressed, because"
175         " libgsttag-" GST_API_VERSION " was compiled without zlib support");
176     g_free (uu_data);
177     return FALSE;
178 #endif
179   } else {
180     work->parse_data = frame_data;
181   }
182 
183   if (work->frame_id[0] == 'T') {
184     if (strcmp (work->frame_id, "TDAT") == 0) {
185       parse_obsolete_tdat_frame (work);
186       result = TRUE;
187     } else if (strcmp (work->frame_id, "TXXX") == 0) {
188       /* Handle user text frame */
189       tag_str = parse_user_text_identification_frame (work, &tag_name);
190     } else {
191       /* Text identification frame */
192       tag_fields = parse_text_identification_frame (work);
193     }
194   } else if (work->frame_id[0] == 'W' && strcmp (work->frame_id, "WXXX") != 0) {
195     /* URL link frame: ISO-8859-1 encoded, one frame per tag */
196     tag_str = parse_url_link_frame (work, &tag_name);
197   } else if (!strcmp (work->frame_id, "COMM")) {
198     /* Comment */
199     result = parse_comment_frame (work);
200   } else if (!strcmp (work->frame_id, "APIC")) {
201     /* Attached picture */
202     result = parse_picture_frame (work);
203   } else if (!strcmp (work->frame_id, "RVA2")) {
204     /* Relative volume */
205     result = parse_relative_volume_adjustment_two (work);
206   } else if (!strcmp (work->frame_id, "UFID")) {
207     /* Unique file identifier */
208     tag_str = parse_unique_file_identifier (work, &tag_name);
209   } else if (!strcmp (work->frame_id, "PRIV")) {
210     /* private frame */
211     result = parse_private_frame_data (work);
212   }
213 #ifdef HAVE_ZLIB
214   if (work->frame_flags & ID3V2_FRAME_FORMAT_COMPRESSION) {
215     g_free (uncompressed_data);
216     uncompressed_data = NULL;
217     work->parse_data = frame_data;
218   }
219 #endif
220 
221   if (tag_str != NULL) {
222     /* g_print ("Tag %s value %s\n", tag_name, tag_str); */
223     result = id3v2_tag_to_taglist (work, tag_name, tag_str);
224     g_free (tag_str);
225   }
226   if (tag_fields != NULL) {
227     if (strcmp (work->frame_id, "TCON") == 0) {
228       /* Genre strings need special treatment */
229       result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields);
230     } else {
231       gint t;
232 
233       for (t = 0; t < tag_fields->len; t++) {
234         tag_str = g_array_index (tag_fields, gchar *, t);
235         if (tag_str != NULL && tag_str[0] != '\0')
236           result |= id3v2_tag_to_taglist (work, tag_name, tag_str);
237       }
238     }
239     free_tag_strings (tag_fields);
240   }
241 
242   g_free (uu_data);
243 
244   return result;
245 }
246 
247 static gboolean
parse_comment_frame(ID3TagsWorking * work)248 parse_comment_frame (ID3TagsWorking * work)
249 {
250   guint dummy;
251   guint8 encoding;
252   gchar language[4];
253   GArray *fields = NULL;
254   gchar *description, *text;
255 
256   if (work->parse_size < 6)
257     return FALSE;
258 
259   encoding = work->parse_data[0];
260   language[0] = g_ascii_tolower (work->parse_data[1]);
261   language[1] = g_ascii_tolower (work->parse_data[2]);
262   language[2] = g_ascii_tolower (work->parse_data[3]);
263   language[3] = '\0';
264 
265   parse_split_strings (encoding, (gchar *) work->parse_data + 4,
266       work->parse_size - 4, &fields);
267 
268   if (fields == NULL || fields->len < 2) {
269     GST_WARNING ("Failed to decode comment frame");
270     goto fail;
271   }
272   description = g_array_index (fields, gchar *, 0);
273   text = g_array_index (fields, gchar *, 1);
274 
275   if (!g_utf8_validate (text, -1, NULL)) {
276     GST_WARNING ("Converted string is not valid utf-8");
277     goto fail;
278   }
279 
280   /* skip our own dummy descriptions (from id3v2mux) */
281   if (strlen (description) > 0 && g_utf8_validate (description, -1, NULL) &&
282       sscanf (description, "c%u", &dummy) != 1) {
283     gchar *s;
284 
285     /* must be either an ISO-639-1 or ISO-639-2 language code */
286     if (language[0] != '\0' &&
287         g_ascii_isalpha (language[0]) &&
288         g_ascii_isalpha (language[1]) &&
289         (g_ascii_isalpha (language[2]) || language[2] == '\0')) {
290       const gchar *lang_code;
291 
292       /* prefer two-letter ISO 639-1 code if we have a mapping */
293       lang_code = gst_tag_get_language_code (language);
294       s = g_strdup_printf ("%s[%s]=%s", description,
295           (lang_code) ? lang_code : language, text);
296     } else {
297       s = g_strdup_printf ("%s=%s", description, text);
298     }
299     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
300         GST_TAG_EXTENDED_COMMENT, s, NULL);
301     g_free (s);
302   } else if (text != NULL && *text != '\0') {
303     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
304         GST_TAG_COMMENT, text, NULL);
305   } else {
306     goto fail;
307   }
308 
309   free_tag_strings (fields);
310   return TRUE;
311 
312 fail:
313   {
314     GST_WARNING ("failed to parse COMM frame");
315     free_tag_strings (fields);
316     return FALSE;
317   }
318 }
319 
320 static GArray *
parse_text_identification_frame(ID3TagsWorking * work)321 parse_text_identification_frame (ID3TagsWorking * work)
322 {
323   guchar encoding;
324   GArray *fields = NULL;
325 
326   if (work->parse_size < 2)
327     return NULL;
328 
329   encoding = work->parse_data[0];
330   parse_split_strings (encoding, (gchar *) work->parse_data + 1,
331       work->parse_size - 1, &fields);
332   if (fields) {
333     if (fields->len > 0) {
334       GST_LOG ("Read %d fields from Text ID frame of size %d with encoding %d"
335           ". First is '%s'", fields->len, work->parse_size - 1, encoding,
336           g_array_index (fields, gchar *, 0));
337     } else {
338       GST_LOG ("Read 0 fields from Text ID frame of size %d with encoding %d",
339           work->parse_size - 1, encoding);
340     }
341   }
342 
343   return fields;
344 }
345 
346 static gboolean
link_is_known_license(const gchar * url)347 link_is_known_license (const gchar * url)
348 {
349   return g_str_has_prefix (url, "http://creativecommons.org/licenses/");
350 }
351 
352 static gchar *
parse_url_link_frame(ID3TagsWorking * work,const gchar ** tag_name)353 parse_url_link_frame (ID3TagsWorking * work, const gchar ** tag_name)
354 {
355   gsize len;
356   gchar *nul, *data, *link;
357 
358   *tag_name = NULL;
359 
360   if (work->parse_size == 0)
361     return NULL;
362 
363   data = (gchar *) work->parse_data;
364   /* if there's more data then the string is long, we only want to parse the
365    * data up to the terminating zero to g_convert and ignore the rest, as
366    * per spec */
367   nul = memchr (data, '\0', work->parse_size);
368   if (nul != NULL) {
369     len = (gsize) (nul - data);
370   } else {
371     len = work->parse_size;
372   }
373 
374   link = g_convert (data, len, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
375 
376   if (link == NULL || !gst_uri_is_valid (link)) {
377     GST_DEBUG ("Invalid URI in %s frame: %s", work->frame_id,
378         GST_STR_NULL (link));
379     g_free (link);
380     return NULL;
381   }
382 
383   /* we don't know if it's a link to a page that explains the copyright
384    * situation, or a link that points to/represents a license, the ID3 spec
385    * does not separate those two things; for now only put known license URIs
386    * into GST_TAG_LICENSE_URI and everything else into GST_TAG_COPYRIGHT_URI */
387   if (strcmp (work->frame_id, "WCOP") == 0) {
388     if (link_is_known_license (link))
389       *tag_name = GST_TAG_LICENSE_URI;
390     else
391       *tag_name = GST_TAG_COPYRIGHT_URI;
392   } else if (strcmp (work->frame_id, "WOAF") == 0) {
393     /* can't be bothered to create a CONTACT_URI tag for this, so let's just
394      * put into into GST_TAG_CONTACT, which is where it ends up when reading
395      * the info from vorbis comments as well */
396     *tag_name = GST_TAG_CONTACT;
397   }
398 
399   return link;
400 }
401 
402 
403 static gchar *
parse_user_text_identification_frame(ID3TagsWorking * work,const gchar ** tag_name)404 parse_user_text_identification_frame (ID3TagsWorking * work,
405     const gchar ** tag_name)
406 {
407   gchar *ret;
408   guchar encoding;
409   GArray *fields = NULL;
410 
411   *tag_name = NULL;
412 
413   if (work->parse_size < 2)
414     return NULL;
415 
416   encoding = work->parse_data[0];
417 
418   parse_split_strings (encoding, (gchar *) work->parse_data + 1,
419       work->parse_size - 1, &fields);
420 
421   if (fields == NULL)
422     return NULL;
423 
424   if (fields->len != 2) {
425     GST_WARNING ("Expected 2 fields in TXXX frame, but got %d", fields->len);
426     free_tag_strings (fields);
427     return NULL;
428   }
429 
430   *tag_name =
431       gst_tag_from_id3_user_tag ("TXXX", g_array_index (fields, gchar *, 0));
432 
433   GST_LOG ("TXXX frame of size %d. Mapped descriptor '%s' to GStreamer tag %s",
434       work->parse_size - 1, g_array_index (fields, gchar *, 0),
435       GST_STR_NULL (*tag_name));
436 
437   if (*tag_name) {
438     ret = g_strdup (g_array_index (fields, gchar *, 1));
439     /* GST_LOG ("%s = %s", *tag_name, GST_STR_NULL (ret)); */
440   } else {
441     ret = NULL;
442   }
443 
444   free_tag_strings (fields);
445   return ret;
446 }
447 
448 static gboolean
parse_id_string(ID3TagsWorking * work,gchar ** p_str,gint * p_len,gint * p_datalen)449 parse_id_string (ID3TagsWorking * work, gchar ** p_str, gint * p_len,
450     gint * p_datalen)
451 {
452   gint len, datalen;
453 
454   if (work->parse_size < 2)
455     return FALSE;
456 
457   for (len = 0; len < work->parse_size - 1; ++len) {
458     if (work->parse_data[len] == '\0')
459       break;
460   }
461 
462   datalen = work->parse_size - (len + 1);
463   if (len == 0 || datalen <= 0)
464     return FALSE;
465 
466   *p_str = g_strndup ((gchar *) work->parse_data, len);
467   *p_len = len;
468   *p_datalen = datalen;
469 
470   return TRUE;
471 }
472 
473 static gboolean
parse_private_frame_data(ID3TagsWorking * work)474 parse_private_frame_data (ID3TagsWorking * work)
475 {
476   GstBuffer *binary_data = NULL;
477   GstStructure *owner_info = NULL;
478   guint8 *owner_str = NULL;
479   gsize owner_len;
480   GstSample *priv_frame = NULL;
481 
482   if (work->parse_size == 0) {
483     /* private frame data not available */
484     return FALSE;
485   }
486 
487   owner_str =
488       (guint8 *) memchr ((guint8 *) work->parse_data, 0, work->parse_size);
489 
490   if (owner_str == NULL) {
491     GST_WARNING ("Invalid PRIV frame received");
492     return FALSE;
493   }
494 
495   owner_len = (gsize) (owner_str - work->parse_data) + 1;
496 
497   owner_info =
498       gst_structure_new ("ID3PrivateFrame", "owner", G_TYPE_STRING,
499       work->parse_data, NULL);
500 
501   binary_data = gst_buffer_new_and_alloc (work->parse_size - owner_len);
502   gst_buffer_fill (binary_data, 0, work->parse_data + owner_len,
503       work->parse_size - owner_len);
504 
505   priv_frame = gst_sample_new (binary_data, NULL, NULL, owner_info);
506 
507   gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
508       GST_TAG_PRIVATE_DATA, priv_frame, NULL);
509 
510   gst_sample_unref (priv_frame);
511   gst_buffer_unref (binary_data);
512 
513   return TRUE;
514 }
515 
516 static gchar *
parse_unique_file_identifier(ID3TagsWorking * work,const gchar ** tag_name)517 parse_unique_file_identifier (ID3TagsWorking * work, const gchar ** tag_name)
518 {
519   gint len, datalen;
520   gchar *owner_id, *data, *ret = NULL;
521 
522   GST_LOG ("parsing UFID frame of size %d", work->parse_size);
523 
524   if (!parse_id_string (work, &owner_id, &len, &datalen))
525     return NULL;
526 
527   data = (gchar *) work->parse_data + len + 1;
528   GST_LOG ("UFID owner ID: %s (+ %d bytes of data)", owner_id, datalen);
529 
530   if (strcmp (owner_id, "http://musicbrainz.org") == 0 &&
531       g_utf8_validate (data, datalen, NULL)) {
532     *tag_name = GST_TAG_MUSICBRAINZ_TRACKID;
533     ret = g_strndup (data, datalen);
534   } else {
535     GST_INFO ("Unknown UFID owner ID: %s", owner_id);
536   }
537   g_free (owner_id);
538 
539   return ret;
540 }
541 
542 /* parse data and return length of the next string in the given encoding,
543  * including the NUL terminator */
544 static gint
scan_encoded_string(guint8 encoding,gchar * data,gint data_size)545 scan_encoded_string (guint8 encoding, gchar * data, gint data_size)
546 {
547   gint i;
548 
549   switch (encoding) {
550     case ID3V2_ENCODING_ISO8859:
551     case ID3V2_ENCODING_UTF8:
552       for (i = 0; i < data_size; ++i) {
553         if (data[i] == '\0')
554           return i + 1;
555       }
556       break;
557     case ID3V2_ENCODING_UTF16:
558     case ID3V2_ENCODING_UTF16BE:
559       /* we don't care about BOMs here and treat them as part of the string */
560       /* Find '\0\0' terminator */
561       for (i = 0; i < data_size - 1; i += 2) {
562         if (data[i] == '\0' && data[i + 1] == '\0')
563           return i + 2;
564       }
565       break;
566     default:
567       break;
568   }
569 
570   return 0;
571 }
572 
573 static gboolean
parse_picture_frame(ID3TagsWorking * work)574 parse_picture_frame (ID3TagsWorking * work)
575 {
576   guint8 txt_encoding, pic_type;
577   gchar *mime_str = NULL;
578   gint len, datalen;
579 
580   GST_LOG ("APIC frame (ID3v2.%u)", ID3V2_VER_MAJOR (work->hdr.version));
581 
582   if (work->parse_size < 1 + 1 + 1 + 1 + 1)
583     goto not_enough_data;
584 
585   txt_encoding = work->parse_data[0];
586   ++work->parse_data;
587   --work->parse_size;
588 
589   /* Read image format; in early ID3v2 versions this is a fixed-length
590    * 3-character string without terminator; in later versions (>= 2.3.0)
591    * this is a NUL-terminated string of variable length */
592   if (ID3V2_VER_MAJOR (work->hdr.version) < 3) {
593     if (work->parse_size < 3)
594       goto not_enough_data;
595 
596     mime_str = g_strndup ((gchar *) work->parse_data, 3);
597     len = 3;
598   } else {
599     if (!parse_id_string (work, &mime_str, &len, &datalen))
600       return FALSE;
601     ++len;                      /* for string terminator */
602   }
603 
604   if (work->parse_size < len + 1 + 1 + 1)
605     goto not_enough_data;
606 
607   work->parse_data += len;
608   work->parse_size -= len;
609 
610   /* Read image type */
611   pic_type = work->parse_data[0];
612   ++work->parse_data;
613   --work->parse_size;
614 
615   GST_LOG ("APIC frame mime type    : %s", GST_STR_NULL (mime_str));
616   GST_LOG ("APIC frame picture type : 0x%02x", (guint) pic_type);
617 
618   if (work->parse_size < 1 + 1)
619     goto not_enough_data;
620 
621   len = scan_encoded_string (txt_encoding, (gchar *) work->parse_data,
622       work->parse_size);
623 
624   if (len < 1)
625     goto error;
626 
627   /* just skip the description string ... */
628   GST_LOG ("Skipping description string (%d bytes in original coding)", len);
629 
630   if (work->parse_size < len + 1)
631     goto not_enough_data;
632 
633   work->parse_data += len;
634   work->parse_size -= len;
635 
636   GST_DEBUG ("image data is %u bytes", work->parse_size);
637 
638   if (work->parse_size <= 0)
639     goto not_enough_data;
640 
641   if (!gst_tag_list_add_id3_image (work->tags, (guint8 *) work->parse_data,
642           work->parse_size, pic_type)) {
643     goto error;
644   }
645 
646   g_free (mime_str);
647   return TRUE;
648 
649 not_enough_data:
650   {
651     GST_DEBUG ("not enough data, skipping APIC frame");
652     /* fall through to error */
653   }
654 error:
655   {
656     GST_DEBUG ("problem parsing APIC frame, skipping");
657     g_free (mime_str);
658     return FALSE;
659   }
660 }
661 
662 #define ID3V2_RVA2_CHANNEL_MASTER  1
663 
664 static gboolean
parse_relative_volume_adjustment_two(ID3TagsWorking * work)665 parse_relative_volume_adjustment_two (ID3TagsWorking * work)
666 {
667   const gchar *gain_tag_name = NULL;
668   const gchar *peak_tag_name = NULL;
669   gdouble gain_dB, peak_val;
670   guint64 peak;
671   guint8 *data, chan, peak_bits;
672   gchar *id;
673   gint len, datalen, i;
674 
675   if (!parse_id_string (work, &id, &len, &datalen))
676     return FALSE;
677 
678   if (datalen < (1 + 2 + 1)) {
679     GST_WARNING ("broken RVA2 frame, data size only %d bytes", datalen);
680     g_free (id);
681     return FALSE;
682   }
683 
684   data = work->parse_data + len + 1;
685   chan = GST_READ_UINT8 (data);
686   gain_dB = (gdouble) ((gint16) GST_READ_UINT16_BE (data + 1)) / 512.0;
687   /* The meaning of the peak value is not defined in the ID3v2 spec. However,
688    * the first/only implementation of this seems to have been in XMMS, and
689    * other libs (like mutagen) seem to follow that implementation as well:
690    * see http://bugs.xmms.org/attachment.cgi?id=113&action=view */
691   peak_bits = GST_READ_UINT8 (data + 1 + 2);
692   if (peak_bits > 64) {
693     GST_WARNING ("silly peak precision of %d bits, ignoring", (gint) peak_bits);
694     peak_bits = 0;
695   }
696   data += 1 + 2 + 1;
697   datalen -= 1 + 2 + 1;
698   if (peak_bits == 16) {
699     peak = GST_READ_UINT16_BE (data);
700   } else {
701     peak = 0;
702     for (i = 0; i < (GST_ROUND_UP_8 (peak_bits) / 8) && datalen > 0; ++i) {
703       peak = peak << 8;
704       peak |= GST_READ_UINT8 (data);
705       ++data;
706       --datalen;
707     }
708   }
709 
710   if (peak_bits > 0) {
711     peak = peak << (64 - GST_ROUND_UP_8 (peak_bits));
712     peak_val =
713         gst_guint64_to_gdouble (peak) /
714         gst_util_guint64_to_gdouble (G_MAXINT64);
715     GST_LOG ("RVA2 frame: id=%s, chan=%u, adj=%.2fdB, peak_bits=%u, peak=%.2f",
716         id, chan, gain_dB, (guint) peak_bits, peak_val);
717   } else {
718     peak_val = 0;
719   }
720 
721   if (chan == ID3V2_RVA2_CHANNEL_MASTER && strcmp (id, "track") == 0) {
722     gain_tag_name = GST_TAG_TRACK_GAIN;
723     peak_tag_name = GST_TAG_TRACK_PEAK;
724   } else if (chan == ID3V2_RVA2_CHANNEL_MASTER && strcmp (id, "album") == 0) {
725     gain_tag_name = GST_TAG_ALBUM_GAIN;
726     peak_tag_name = GST_TAG_ALBUM_PEAK;
727   } else {
728     GST_INFO ("Unhandled RVA2 frame id '%s' for channel %d", id, chan);
729   }
730 
731   if (gain_tag_name) {
732     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
733         gain_tag_name, gain_dB, NULL);
734   }
735   if (peak_tag_name && peak_bits > 0) {
736     gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
737         peak_tag_name, peak_val, NULL);
738   }
739 
740   g_free (id);
741 
742   return (gain_tag_name != NULL || peak_tag_name != NULL);
743 }
744 
745 static void
parse_obsolete_tdat_frame(ID3TagsWorking * work)746 parse_obsolete_tdat_frame (ID3TagsWorking * work)
747 {
748   if (work->parse_size >= 5 &&
749       work->parse_data[0] == ID3V2_ENCODING_ISO8859 &&
750       g_ascii_isdigit (work->parse_data[1]) &&
751       g_ascii_isdigit (work->parse_data[2]) &&
752       g_ascii_isdigit (work->parse_data[3]) &&
753       g_ascii_isdigit (work->parse_data[4])) {
754 
755     guint pending_day = (10 * g_ascii_digit_value (work->parse_data[1])) +
756         g_ascii_digit_value (work->parse_data[2]);
757     guint pending_month = (10 * g_ascii_digit_value (work->parse_data[3])) +
758         g_ascii_digit_value (work->parse_data[4]);
759 
760     if (pending_day >= 1 && pending_day <= 31 && pending_month >= 1
761         && pending_month <= 12) {
762       GST_LOG ("date (dd/mm) %02u/%02u", pending_day, pending_month);
763       work->pending_day = pending_day;
764       work->pending_month = pending_month;
765     } else {
766       GST_WARNING ("Ignoring invalid ID3v2 TDAT frame (dd/mm) %02u/%02u",
767           pending_day, pending_month);
768     }
769   }
770 }
771 
772 static gboolean
id3v2_tag_to_taglist(ID3TagsWorking * work,const gchar * tag_name,const gchar * tag_str)773 id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
774     const gchar * tag_str)
775 {
776   GType tag_type = gst_tag_get_type (tag_name);
777   GstTagList *tag_list = work->tags;
778 
779   if (tag_str == NULL)
780     return FALSE;
781 
782   switch (tag_type) {
783     case G_TYPE_UINT:
784     {
785       gint current, total;
786 
787       if (sscanf (tag_str, "%d/%d", &current, &total) == 2) {
788         if (total <= 0) {
789           GST_WARNING ("Ignoring invalid value for total %d in tag %s",
790               total, tag_name);
791         } else {
792           if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
793             gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
794                 GST_TAG_TRACK_COUNT, total, NULL);
795           } else if (strcmp (tag_name, GST_TAG_ALBUM_VOLUME_NUMBER) == 0) {
796             gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
797                 GST_TAG_ALBUM_VOLUME_COUNT, total, NULL);
798           }
799         }
800       } else if (sscanf (tag_str, "%d", &current) != 1) {
801         /* Not an integer in the string */
802         GST_WARNING ("Tag string for tag %s does not contain an integer - "
803             "ignoring", tag_name);
804         break;
805       }
806 
807       if (current <= 0) {
808         GST_WARNING ("Ignoring invalid value %d in tag %s", current, tag_name);
809       } else {
810         gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, current,
811             NULL);
812       }
813       break;
814     }
815     case G_TYPE_UINT64:
816     {
817       guint64 tmp;
818 
819       g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0);
820       tmp = strtoul (tag_str, NULL, 10);
821       if (tmp == 0) {
822         break;
823       }
824       gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
825           GST_TAG_DURATION, tmp * 1000 * 1000, NULL);
826       break;
827     }
828     case G_TYPE_STRING:{
829       const GValue *val;
830       guint i, num;
831 
832       /* make sure we add each unique string only once per tag, we don't want
833        * to have the same genre in the genre list multiple times, for example,
834        * or the same DiscID in there twice just because it's contained in the
835        * tag multiple times under different TXXX user tags */
836       num = gst_tag_list_get_tag_size (tag_list, tag_name);
837       for (i = 0; i < num; ++i) {
838         val = gst_tag_list_get_value_index (tag_list, tag_name, i);
839         if (val != NULL && strcmp (g_value_get_string (val), tag_str) == 0)
840           break;
841       }
842       if (i == num) {
843         gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
844             tag_name, tag_str, NULL);
845       }
846       break;
847     }
848 
849     default:{
850       if (tag_type == GST_TYPE_DATE_TIME) {
851         GstDateTime *dt;
852 
853         /* Dates can be yyyy-MM-dd, yyyy-MM or yyyy */
854         dt = gst_date_time_new_from_iso8601_string (tag_str);
855         if (dt != NULL) {
856           gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, dt, NULL);
857           gst_date_time_unref (dt);
858         } else {
859           GST_WARNING ("Could not transform '%s' into date", tag_str);
860         }
861       } else {
862         GValue src = { 0, };
863         GValue dest = { 0, };
864 
865         /* handles anything else */
866         g_value_init (&src, G_TYPE_STRING);
867         g_value_set_string (&src, (const gchar *) tag_str);
868         g_value_init (&dest, tag_type);
869 
870         if (g_value_transform (&src, &dest)) {
871           gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND,
872               tag_name, &dest, NULL);
873         } else if (tag_type == G_TYPE_DOUBLE) {
874           /* replaygain tags in TXXX frames ... */
875           g_value_set_double (&dest, g_strtod (tag_str, NULL));
876           gst_tag_list_add_values (tag_list, GST_TAG_MERGE_KEEP,
877               tag_name, &dest, NULL);
878           GST_LOG ("Converted string '%s' to double %f", tag_str,
879               g_value_get_double (&dest));
880         } else {
881           GST_WARNING ("Failed to transform tag from string '%s' to type '%s'",
882               tag_str, g_type_name (tag_type));
883         }
884 
885         g_value_unset (&src);
886         g_value_unset (&dest);
887       }
888       break;
889     }
890   }
891 
892   return TRUE;
893 }
894 
895 /* Check that an array of characters contains only digits */
896 static gboolean
id3v2_are_digits(const gchar * chars,gint size)897 id3v2_are_digits (const gchar * chars, gint size)
898 {
899   gint i;
900 
901   for (i = 0; i < size; i++) {
902     if (!g_ascii_isdigit (chars[i]))
903       return FALSE;
904   }
905   return TRUE;
906 }
907 
908 static gboolean
id3v2_genre_string_to_taglist(ID3TagsWorking * work,const gchar * tag_name,const gchar * tag_str,gint len)909 id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
910     const gchar * tag_str, gint len)
911 {
912   g_return_val_if_fail (tag_str != NULL, FALSE);
913 
914   /* If it's a number, it might be a defined genre */
915   if (id3v2_are_digits (tag_str, len)) {
916     tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10));
917     return id3v2_tag_to_taglist (work, tag_name, tag_str);
918   }
919   /* Otherwise it might be "RX" or "CR" */
920   if (len == 2) {
921     if (g_ascii_strncasecmp ("rx", tag_str, len) == 0)
922       return id3v2_tag_to_taglist (work, tag_name, "Remix");
923 
924     if (g_ascii_strncasecmp ("cr", tag_str, len) == 0)
925       return id3v2_tag_to_taglist (work, tag_name, "Cover");
926   }
927 
928   /* Otherwise it's a string */
929   return id3v2_tag_to_taglist (work, tag_name, tag_str);
930 }
931 
932 static gboolean
id3v2_genre_fields_to_taglist(ID3TagsWorking * work,const gchar * tag_name,GArray * tag_fields)933 id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
934     GArray * tag_fields)
935 {
936   gchar *tag_str = NULL;
937   gboolean result = FALSE;
938   gint i;
939 
940   for (i = 0; i < tag_fields->len; i++) {
941     gint len;
942 
943     tag_str = g_array_index (tag_fields, gchar *, i);
944     if (tag_str == NULL)
945       continue;
946 
947     len = strlen (tag_str);
948     /* Only supposed to see '(n)' type numeric genre strings in ID3 <= 2.3.0
949      * but apparently we see them in 2.4.0 sometimes too */
950     if (TRUE || work->hdr.version <= 0x300) {   /* <= 2.3.0 */
951       /* Check for genre numbers wrapped in parentheses, possibly
952        * followed by a string */
953       while (len >= 2) {
954         gint pos;
955         gboolean found = FALSE;
956 
957         /* Double parenthesis ends the numeric genres, but we need
958          * to swallow the first one so we actually output '(' */
959         if (tag_str[0] == '(' && tag_str[1] == '(') {
960           tag_str++;
961           len--;
962           break;
963         }
964 
965         /* If the first char is not a parenthesis, then stop
966          * looking for parenthesised genre strings */
967         if (tag_str[0] != '(')
968           break;
969 
970         for (pos = 1; pos < len; pos++) {
971           if (tag_str[pos] == ')') {
972             gchar *tmp_str;
973 
974             tmp_str = g_strndup (tag_str + 1, pos - 1);
975             result |=
976                 id3v2_genre_string_to_taglist (work, tag_name, tmp_str,
977                 pos - 1);
978             g_free (tmp_str);
979             tag_str += pos + 1;
980             len -= pos + 1;
981             found = TRUE;
982             break;
983           }
984 
985           /* If we encounter a non-digit while searching for a closing
986            * parenthesis, we should not try and interpret this as a
987            * numeric genre string */
988           if (!g_ascii_isdigit (tag_str[pos]))
989             break;
990         }
991         if (!found)
992           break;                /* There was no closing parenthesis */
993       }
994     }
995 
996     if (len > 0 && tag_str != NULL)
997       result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len);
998   }
999   return result;
1000 }
1001 
1002 static gboolean
find_utf16_bom(gchar * data,gint * p_data_endianness)1003 find_utf16_bom (gchar * data, gint * p_data_endianness)
1004 {
1005   guint16 marker = (GST_READ_UINT8 (data) << 8) | GST_READ_UINT8 (data + 1);
1006 
1007   switch (marker) {
1008     case 0xFFFE:
1009       *p_data_endianness = G_LITTLE_ENDIAN;
1010       return TRUE;
1011     case 0xFEFF:
1012       *p_data_endianness = G_BIG_ENDIAN;
1013       return TRUE;
1014     default:
1015       break;
1016   }
1017   return FALSE;
1018 }
1019 
1020 static void *
string_utf8_dup(const gchar * start,const guint size)1021 string_utf8_dup (const gchar * start, const guint size)
1022 {
1023   const gchar *env;
1024   gsize bytes_read;
1025   gchar *utf8;
1026 
1027   /* Should we try the charsets specified
1028    * via environment variables FIRST ? */
1029   if (g_utf8_validate (start, size, NULL)) {
1030     utf8 = g_strndup (start, size);
1031     goto beach;
1032   }
1033 
1034   env = g_getenv ("GST_ID3V1_TAG_ENCODING");
1035   if (!env || *env == '\0')
1036     env = g_getenv ("GST_ID3_TAG_ENCODING");
1037   if (!env || *env == '\0')
1038     env = g_getenv ("GST_TAG_ENCODING");
1039 
1040   /* Try charsets specified via the environment */
1041   if (env && *env != '\0') {
1042     gchar **c, **csets;
1043 
1044     csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
1045 
1046     for (c = csets; c && *c; ++c) {
1047       if ((utf8 =
1048               g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
1049         if (bytes_read == size) {
1050           GST_DEBUG ("Using charset %s to interpret id3 tags", *c);
1051           g_strfreev (csets);
1052           goto beach;
1053         }
1054         g_free (utf8);
1055         utf8 = NULL;
1056       }
1057     }
1058   }
1059   /* Try current locale (if not UTF-8) */
1060   if (!g_get_charset (&env)) {
1061     if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) {
1062       if (bytes_read == size) {
1063         goto beach;
1064       }
1065       g_free (utf8);
1066       utf8 = NULL;
1067     }
1068   }
1069 
1070   /* Try ISO-8859-1 */
1071   utf8 =
1072       g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
1073   if (utf8 != NULL && bytes_read == size) {
1074     goto beach;
1075   }
1076 
1077   g_free (utf8);
1078   return NULL;
1079 
1080 beach:
1081 
1082   g_strchomp (utf8);
1083 
1084   return (utf8);
1085 }
1086 
1087 static void
parse_insert_string_field(guint8 encoding,gchar * data,gint data_size,GArray * fields)1088 parse_insert_string_field (guint8 encoding, gchar * data, gint data_size,
1089     GArray * fields)
1090 {
1091   gchar *field = NULL;
1092 
1093   switch (encoding) {
1094     case ID3V2_ENCODING_UTF16:
1095     case ID3V2_ENCODING_UTF16BE:
1096     {
1097       gunichar2 *utf16;
1098       gint data_endianness;
1099       glong n_read = 0, size = 0;
1100       guint len, i;
1101 
1102       if (encoding == ID3V2_ENCODING_UTF16)
1103         data_endianness = G_BYTE_ORDER;
1104       else
1105         data_endianness = G_BIG_ENDIAN;
1106 
1107       /* Sometimes we see strings with multiple BOM markers at the start.
1108        * In that case, we assume the innermost one is correct. If that fails
1109        * to produce valid UTF-8, we try the other endianness anyway */
1110       while (data_size >= 2 && find_utf16_bom (data, &data_endianness)) {
1111         data += 2;              /* skip BOM */
1112         data_size -= 2;
1113       }
1114 
1115       if (data_size < 2) {
1116         field = g_strdup ("");
1117         break;
1118       }
1119 
1120       /* alloc needed to ensure correct alignment which is required by GLib */
1121       len = data_size / 2;
1122       utf16 = g_try_new (gunichar2, len + 1);
1123       if (utf16 == NULL)
1124         break;
1125 
1126       memcpy (utf16, data, 2 * len);
1127 
1128       GST_LOG ("Trying interpreting data as UTF-16-%s first",
1129           (data_endianness == G_LITTLE_ENDIAN) ? "LE" : "BE");
1130 
1131       if (data_endianness != G_BYTE_ORDER) {
1132         /* convert to native endian UTF-16 */
1133         for (i = 0; i < len; ++i)
1134           utf16[i] = GUINT16_SWAP_LE_BE (utf16[i]);
1135       }
1136 
1137       /* convert to UTF-8 */
1138       field = g_utf16_to_utf8 (utf16, len, &n_read, &size, NULL);
1139       if (field != NULL && n_read > 0 && g_utf8_validate (field, -1, NULL)) {
1140         g_free (utf16);
1141         break;
1142       }
1143 
1144       GST_DEBUG ("Trying interpreting data as UTF-16-%s now as fallback",
1145           (data_endianness == G_LITTLE_ENDIAN) ? "BE" : "LE");
1146 
1147       for (i = 0; i < len; ++i)
1148         utf16[i] = GUINT16_SWAP_LE_BE (utf16[i]);
1149 
1150       g_free (field);
1151       n_read = size = 0;
1152 
1153       /* try again */
1154       field = g_utf16_to_utf8 (utf16, len, &n_read, &size, NULL);
1155       g_free (utf16);
1156 
1157       if (field != NULL && n_read > 0 && g_utf8_validate (field, -1, NULL))
1158         break;
1159 
1160       GST_DEBUG ("Could not convert UTF-16 string to UTF-8");
1161       g_free (field);
1162       field = NULL;
1163       break;
1164     }
1165     case ID3V2_ENCODING_ISO8859:
1166       if (g_utf8_validate (data, data_size, NULL))
1167         field = g_strndup (data, data_size);
1168       else
1169         /* field = g_convert (data, data_size, "UTF-8", "ISO-8859-1",
1170            NULL, NULL, NULL); */
1171         field = string_utf8_dup (data, data_size);
1172       break;
1173     default:
1174       field = g_strndup (data, data_size);
1175       break;
1176   }
1177 
1178   if (field) {
1179     if (g_utf8_validate (field, -1, NULL)) {
1180       g_array_append_val (fields, field);
1181       return;
1182     }
1183 
1184     GST_DEBUG ("%s was bad UTF-8 after conversion from encoding %d. Ignoring",
1185         field, encoding);
1186     g_free (field);
1187   }
1188 }
1189 
1190 static void
parse_split_strings(guint8 encoding,gchar * data,gint data_size,GArray ** out_fields)1191 parse_split_strings (guint8 encoding, gchar * data, gint data_size,
1192     GArray ** out_fields)
1193 {
1194   GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
1195   gint text_pos;
1196   gint prev = 0;
1197 
1198   g_return_if_fail (out_fields != NULL);
1199 
1200   switch (encoding) {
1201     case ID3V2_ENCODING_ISO8859:
1202       for (text_pos = 0; text_pos < data_size; text_pos++) {
1203         if (data[text_pos] == 0) {
1204           parse_insert_string_field (encoding, data + prev,
1205               text_pos - prev, fields);
1206           prev = text_pos + 1;
1207         }
1208       }
1209       if (data_size - prev > 0 && data[prev] != 0x00) {
1210         parse_insert_string_field (encoding, data + prev,
1211             data_size - prev, fields);
1212       }
1213 
1214       break;
1215     case ID3V2_ENCODING_UTF8:
1216       for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
1217         if (data[text_pos] == '\0') {
1218           parse_insert_string_field (encoding, data + prev,
1219               text_pos - prev, fields);
1220           prev = text_pos + 1;
1221         }
1222       }
1223       if (data_size - prev > 0 && data[prev] != 0x00) {
1224         parse_insert_string_field (encoding, data + prev,
1225             data_size - prev, fields);
1226       }
1227       break;
1228     case ID3V2_ENCODING_UTF16:
1229     case ID3V2_ENCODING_UTF16BE:
1230     {
1231       /* Find '\0\0' terminator */
1232       for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
1233         if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') {
1234           /* found a delimiter */
1235           parse_insert_string_field (encoding, data + prev,
1236               text_pos - prev, fields);
1237           prev = text_pos + 2;
1238         }
1239       }
1240       if (data_size - prev > 1 &&
1241           (data[prev] != 0x00 || data[prev + 1] != 0x00)) {
1242         /* There were 2 or more non-null chars left, convert those too */
1243         parse_insert_string_field (encoding, data + prev,
1244             data_size - prev, fields);
1245       }
1246       break;
1247     }
1248   }
1249   if (fields->len > 0)
1250     *out_fields = fields;
1251   else
1252     g_array_free (fields, TRUE);
1253 }
1254 
1255 static void
free_tag_strings(GArray * fields)1256 free_tag_strings (GArray * fields)
1257 {
1258   if (fields) {
1259     gint i;
1260     gchar *c;
1261 
1262     for (i = 0; i < fields->len; i++) {
1263       c = g_array_index (fields, gchar *, i);
1264       g_free (c);
1265     }
1266     g_array_free (fields, TRUE);
1267   }
1268 }
1269