• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, OSError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr().  (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list.  For directory
24arguments it calls what() for all files in that directory.  Default
25argument is "." (testing all files in the current directory).  The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
30# The file structure is top-down except that the test program and its
31# subroutine come last.
32
33__all__ = ['what', 'whathdr']
34
35from collections import namedtuple
36
37SndHeaders = namedtuple('SndHeaders',
38                        'filetype framerate nchannels nframes sampwidth')
39
40SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
41and will be one of the strings 'aifc', 'aiff', 'au','hcom',
42'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
43SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
44value or 0 if unknown or difficult to decode.""")
45SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
46determined or if the value is difficult to decode.""")
47SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
48of frames or -1.""")
49SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
50'A' for A-LAW or 'U' for u-LAW.""")
51
52def what(filename):
53    """Guess the type of a sound file."""
54    res = whathdr(filename)
55    return res
56
57
58def whathdr(filename):
59    """Recognize sound headers."""
60    with open(filename, 'rb') as f:
61        h = f.read(512)
62        for tf in tests:
63            res = tf(h, f)
64            if res:
65                return SndHeaders(*res)
66        return None
67
68
69#-----------------------------------#
70# Subroutines per sound header type #
71#-----------------------------------#
72
73tests = []
74
75def test_aifc(h, f):
76    import aifc
77    if not h.startswith(b'FORM'):
78        return None
79    if h[8:12] == b'AIFC':
80        fmt = 'aifc'
81    elif h[8:12] == b'AIFF':
82        fmt = 'aiff'
83    else:
84        return None
85    f.seek(0)
86    try:
87        a = aifc.open(f, 'r')
88    except (EOFError, aifc.Error):
89        return None
90    return (fmt, a.getframerate(), a.getnchannels(),
91            a.getnframes(), 8 * a.getsampwidth())
92
93tests.append(test_aifc)
94
95
96def test_au(h, f):
97    if h.startswith(b'.snd'):
98        func = get_long_be
99    elif h[:4] in (b'\0ds.', b'dns.'):
100        func = get_long_le
101    else:
102        return None
103    filetype = 'au'
104    hdr_size = func(h[4:8])
105    data_size = func(h[8:12])
106    encoding = func(h[12:16])
107    rate = func(h[16:20])
108    nchannels = func(h[20:24])
109    sample_size = 1 # default
110    if encoding == 1:
111        sample_bits = 'U'
112    elif encoding == 2:
113        sample_bits = 8
114    elif encoding == 3:
115        sample_bits = 16
116        sample_size = 2
117    else:
118        sample_bits = '?'
119    frame_size = sample_size * nchannels
120    if frame_size:
121        nframe = data_size / frame_size
122    else:
123        nframe = -1
124    return filetype, rate, nchannels, nframe, sample_bits
125
126tests.append(test_au)
127
128
129def test_hcom(h, f):
130    if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
131        return None
132    divisor = get_long_be(h[144:148])
133    if divisor:
134        rate = 22050 / divisor
135    else:
136        rate = 0
137    return 'hcom', rate, 1, -1, 8
138
139tests.append(test_hcom)
140
141
142def test_voc(h, f):
143    if not h.startswith(b'Creative Voice File\032'):
144        return None
145    sbseek = get_short_le(h[20:22])
146    rate = 0
147    if 0 <= sbseek < 500 and h[sbseek] == 1:
148        ratecode = 256 - h[sbseek+4]
149        if ratecode:
150            rate = int(1000000.0 / ratecode)
151    return 'voc', rate, 1, -1, 8
152
153tests.append(test_voc)
154
155
156def test_wav(h, f):
157    import wave
158    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
159    if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
160        return None
161    f.seek(0)
162    try:
163        w = wave.open(f, 'r')
164    except (EOFError, wave.Error):
165        return None
166    return ('wav', w.getframerate(), w.getnchannels(),
167                   w.getnframes(), 8*w.getsampwidth())
168
169tests.append(test_wav)
170
171
172def test_8svx(h, f):
173    if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
174        return None
175    # Should decode it to get #channels -- assume always 1
176    return '8svx', 0, 1, 0, 8
177
178tests.append(test_8svx)
179
180
181def test_sndt(h, f):
182    if h.startswith(b'SOUND'):
183        nsamples = get_long_le(h[8:12])
184        rate = get_short_le(h[20:22])
185        return 'sndt', rate, 1, nsamples, 8
186
187tests.append(test_sndt)
188
189
190def test_sndr(h, f):
191    if h.startswith(b'\0\0'):
192        rate = get_short_le(h[2:4])
193        if 4000 <= rate <= 25000:
194            return 'sndr', rate, 1, -1, 8
195
196tests.append(test_sndr)
197
198
199#-------------------------------------------#
200# Subroutines to extract numbers from bytes #
201#-------------------------------------------#
202
203def get_long_be(b):
204    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
205
206def get_long_le(b):
207    return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
208
209def get_short_be(b):
210    return (b[0] << 8) | b[1]
211
212def get_short_le(b):
213    return (b[1] << 8) | b[0]
214
215
216#--------------------#
217# Small test program #
218#--------------------#
219
220def test():
221    import sys
222    recursive = 0
223    if sys.argv[1:] and sys.argv[1] == '-r':
224        del sys.argv[1:2]
225        recursive = 1
226    try:
227        if sys.argv[1:]:
228            testall(sys.argv[1:], recursive, 1)
229        else:
230            testall(['.'], recursive, 1)
231    except KeyboardInterrupt:
232        sys.stderr.write('\n[Interrupted]\n')
233        sys.exit(1)
234
235def testall(list, recursive, toplevel):
236    import sys
237    import os
238    for filename in list:
239        if os.path.isdir(filename):
240            print(filename + '/:', end=' ')
241            if recursive or toplevel:
242                print('recursing down:')
243                import glob
244                names = glob.glob(os.path.join(glob.escape(filename), '*'))
245                testall(names, recursive, 0)
246            else:
247                print('*** directory (use -r) ***')
248        else:
249            print(filename + ':', end=' ')
250            sys.stdout.flush()
251            try:
252                print(what(filename))
253            except OSError:
254                print('*** not found ***')
255
256if __name__ == '__main__':
257    test()
258