1"""Routines to help recognizing sound files. 2 3Function whathdr() recognizes various types of sound file headers. 4It understands almost all headers that SOX can decode. 5 6The return tuple contains the following items, in this order: 7- file type (as SOX understands it) 8- sampling rate (0 if unknown or hard to decode) 9- number of channels (0 if unknown or hard to decode) 10- number of frames in the file (-1 if unknown or hard to decode) 11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW 12 13If the file doesn't have a recognizable type, it returns None. 14If the file can't be opened, OSError is raised. 15 16To compute the total time, divide the number of frames by the 17sampling rate (a frame contains a sample for each channel). 18 19Function what() calls whathdr(). (It used to also use some 20heuristics for raw data, but this doesn't work very well.) 21 22Finally, the function test() is a simple main program that calls 23what() for all files mentioned on the argument list. For directory 24arguments it calls what() for all files in that directory. Default 25argument is "." (testing all files in the current directory). The 26option -r tells it to recurse down directories found inside 27explicitly given directories. 28""" 29 30# The file structure is top-down except that the test program and its 31# subroutine come last. 32 33__all__ = ['what', 'whathdr'] 34 35from collections import namedtuple 36 37SndHeaders = namedtuple('SndHeaders', 38 'filetype framerate nchannels nframes sampwidth') 39 40SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type 41and will be one of the strings 'aifc', 'aiff', 'au','hcom', 42'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""") 43SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual 44value or 0 if unknown or difficult to decode.""") 45SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be 46determined or if the value is difficult to decode.""") 47SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number 48of frames or -1.""") 49SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or 50'A' for A-LAW or 'U' for u-LAW.""") 51 52def what(filename): 53 """Guess the type of a sound file.""" 54 res = whathdr(filename) 55 return res 56 57 58def whathdr(filename): 59 """Recognize sound headers.""" 60 with open(filename, 'rb') as f: 61 h = f.read(512) 62 for tf in tests: 63 res = tf(h, f) 64 if res: 65 return SndHeaders(*res) 66 return None 67 68 69#-----------------------------------# 70# Subroutines per sound header type # 71#-----------------------------------# 72 73tests = [] 74 75def test_aifc(h, f): 76 import aifc 77 if not h.startswith(b'FORM'): 78 return None 79 if h[8:12] == b'AIFC': 80 fmt = 'aifc' 81 elif h[8:12] == b'AIFF': 82 fmt = 'aiff' 83 else: 84 return None 85 f.seek(0) 86 try: 87 a = aifc.open(f, 'r') 88 except (EOFError, aifc.Error): 89 return None 90 return (fmt, a.getframerate(), a.getnchannels(), 91 a.getnframes(), 8 * a.getsampwidth()) 92 93tests.append(test_aifc) 94 95 96def test_au(h, f): 97 if h.startswith(b'.snd'): 98 func = get_long_be 99 elif h[:4] in (b'\0ds.', b'dns.'): 100 func = get_long_le 101 else: 102 return None 103 filetype = 'au' 104 hdr_size = func(h[4:8]) 105 data_size = func(h[8:12]) 106 encoding = func(h[12:16]) 107 rate = func(h[16:20]) 108 nchannels = func(h[20:24]) 109 sample_size = 1 # default 110 if encoding == 1: 111 sample_bits = 'U' 112 elif encoding == 2: 113 sample_bits = 8 114 elif encoding == 3: 115 sample_bits = 16 116 sample_size = 2 117 else: 118 sample_bits = '?' 119 frame_size = sample_size * nchannels 120 if frame_size: 121 nframe = data_size / frame_size 122 else: 123 nframe = -1 124 return filetype, rate, nchannels, nframe, sample_bits 125 126tests.append(test_au) 127 128 129def test_hcom(h, f): 130 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM': 131 return None 132 divisor = get_long_be(h[144:148]) 133 if divisor: 134 rate = 22050 / divisor 135 else: 136 rate = 0 137 return 'hcom', rate, 1, -1, 8 138 139tests.append(test_hcom) 140 141 142def test_voc(h, f): 143 if not h.startswith(b'Creative Voice File\032'): 144 return None 145 sbseek = get_short_le(h[20:22]) 146 rate = 0 147 if 0 <= sbseek < 500 and h[sbseek] == 1: 148 ratecode = 256 - h[sbseek+4] 149 if ratecode: 150 rate = int(1000000.0 / ratecode) 151 return 'voc', rate, 1, -1, 8 152 153tests.append(test_voc) 154 155 156def test_wav(h, f): 157 import wave 158 # 'RIFF' <len> 'WAVE' 'fmt ' <len> 159 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': 160 return None 161 f.seek(0) 162 try: 163 w = wave.open(f, 'r') 164 except (EOFError, wave.Error): 165 return None 166 return ('wav', w.getframerate(), w.getnchannels(), 167 w.getnframes(), 8*w.getsampwidth()) 168 169tests.append(test_wav) 170 171 172def test_8svx(h, f): 173 if not h.startswith(b'FORM') or h[8:12] != b'8SVX': 174 return None 175 # Should decode it to get #channels -- assume always 1 176 return '8svx', 0, 1, 0, 8 177 178tests.append(test_8svx) 179 180 181def test_sndt(h, f): 182 if h.startswith(b'SOUND'): 183 nsamples = get_long_le(h[8:12]) 184 rate = get_short_le(h[20:22]) 185 return 'sndt', rate, 1, nsamples, 8 186 187tests.append(test_sndt) 188 189 190def test_sndr(h, f): 191 if h.startswith(b'\0\0'): 192 rate = get_short_le(h[2:4]) 193 if 4000 <= rate <= 25000: 194 return 'sndr', rate, 1, -1, 8 195 196tests.append(test_sndr) 197 198 199#-------------------------------------------# 200# Subroutines to extract numbers from bytes # 201#-------------------------------------------# 202 203def get_long_be(b): 204 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3] 205 206def get_long_le(b): 207 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0] 208 209def get_short_be(b): 210 return (b[0] << 8) | b[1] 211 212def get_short_le(b): 213 return (b[1] << 8) | b[0] 214 215 216#--------------------# 217# Small test program # 218#--------------------# 219 220def test(): 221 import sys 222 recursive = 0 223 if sys.argv[1:] and sys.argv[1] == '-r': 224 del sys.argv[1:2] 225 recursive = 1 226 try: 227 if sys.argv[1:]: 228 testall(sys.argv[1:], recursive, 1) 229 else: 230 testall(['.'], recursive, 1) 231 except KeyboardInterrupt: 232 sys.stderr.write('\n[Interrupted]\n') 233 sys.exit(1) 234 235def testall(list, recursive, toplevel): 236 import sys 237 import os 238 for filename in list: 239 if os.path.isdir(filename): 240 print(filename + '/:', end=' ') 241 if recursive or toplevel: 242 print('recursing down:') 243 import glob 244 names = glob.glob(os.path.join(glob.escape(filename), '*')) 245 testall(names, recursive, 0) 246 else: 247 print('*** directory (use -r) ***') 248 else: 249 print(filename + ':', end=' ') 250 sys.stdout.flush() 251 try: 252 print(what(filename)) 253 except OSError: 254 print('*** not found ***') 255 256if __name__ == '__main__': 257 test() 258