1"""Routines to help recognizing sound files. 2 3Function whathdr() recognizes various types of sound file headers. 4It understands almost all headers that SOX can decode. 5 6The return tuple contains the following items, in this order: 7- file type (as SOX understands it) 8- sampling rate (0 if unknown or hard to decode) 9- number of channels (0 if unknown or hard to decode) 10- number of frames in the file (-1 if unknown or hard to decode) 11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW 12 13If the file doesn't have a recognizable type, it returns None. 14If the file can't be opened, IOError is raised. 15 16To compute the total time, divide the number of frames by the 17sampling rate (a frame contains a sample for each channel). 18 19Function what() calls whathdr(). (It used to also use some 20heuristics for raw data, but this doesn't work very well.) 21 22Finally, the function test() is a simple main program that calls 23what() for all files mentioned on the argument list. For directory 24arguments it calls what() for all files in that directory. Default 25argument is "." (testing all files in the current directory). The 26option -r tells it to recurse down directories found inside 27explicitly given directories. 28""" 29 30# The file structure is top-down except that the test program and its 31# subroutine come last. 32 33__all__ = ["what","whathdr"] 34 35def what(filename): 36 """Guess the type of a sound file""" 37 res = whathdr(filename) 38 return res 39 40 41def whathdr(filename): 42 """Recognize sound headers""" 43 f = open(filename, 'rb') 44 h = f.read(512) 45 for tf in tests: 46 res = tf(h, f) 47 if res: 48 return res 49 return None 50 51 52#-----------------------------------# 53# Subroutines per sound header type # 54#-----------------------------------# 55 56tests = [] 57 58def test_aifc(h, f): 59 import aifc 60 if h[:4] != 'FORM': 61 return None 62 if h[8:12] == 'AIFC': 63 fmt = 'aifc' 64 elif h[8:12] == 'AIFF': 65 fmt = 'aiff' 66 else: 67 return None 68 f.seek(0) 69 try: 70 a = aifc.openfp(f, 'r') 71 except (EOFError, aifc.Error): 72 return None 73 return (fmt, a.getframerate(), a.getnchannels(), \ 74 a.getnframes(), 8*a.getsampwidth()) 75 76tests.append(test_aifc) 77 78 79def test_au(h, f): 80 if h[:4] == '.snd': 81 f = get_long_be 82 elif h[:4] in ('\0ds.', 'dns.'): 83 f = get_long_le 84 else: 85 return None 86 type = 'au' 87 hdr_size = f(h[4:8]) 88 data_size = f(h[8:12]) 89 encoding = f(h[12:16]) 90 rate = f(h[16:20]) 91 nchannels = f(h[20:24]) 92 sample_size = 1 # default 93 if encoding == 1: 94 sample_bits = 'U' 95 elif encoding == 2: 96 sample_bits = 8 97 elif encoding == 3: 98 sample_bits = 16 99 sample_size = 2 100 else: 101 sample_bits = '?' 102 frame_size = sample_size * nchannels 103 return type, rate, nchannels, data_size//frame_size, sample_bits 104 105tests.append(test_au) 106 107 108def test_hcom(h, f): 109 if h[65:69] != 'FSSD' or h[128:132] != 'HCOM': 110 return None 111 divisor = get_long_be(h[128+16:128+20]) 112 return 'hcom', 22050//divisor, 1, -1, 8 113 114tests.append(test_hcom) 115 116 117def test_voc(h, f): 118 if h[:20] != 'Creative Voice File\032': 119 return None 120 sbseek = get_short_le(h[20:22]) 121 rate = 0 122 if 0 <= sbseek < 500 and h[sbseek] == '\1': 123 ratecode = ord(h[sbseek+4]) 124 rate = int(1000000.0 / (256 - ratecode)) 125 return 'voc', rate, 1, -1, 8 126 127tests.append(test_voc) 128 129 130def test_wav(h, f): 131 # 'RIFF' <len> 'WAVE' 'fmt ' <len> 132 if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ': 133 return None 134 style = get_short_le(h[20:22]) 135 nchannels = get_short_le(h[22:24]) 136 rate = get_long_le(h[24:28]) 137 sample_bits = get_short_le(h[34:36]) 138 return 'wav', rate, nchannels, -1, sample_bits 139 140tests.append(test_wav) 141 142 143def test_8svx(h, f): 144 if h[:4] != 'FORM' or h[8:12] != '8SVX': 145 return None 146 # Should decode it to get #channels -- assume always 1 147 return '8svx', 0, 1, 0, 8 148 149tests.append(test_8svx) 150 151 152def test_sndt(h, f): 153 if h[:5] == 'SOUND': 154 nsamples = get_long_le(h[8:12]) 155 rate = get_short_le(h[20:22]) 156 return 'sndt', rate, 1, nsamples, 8 157 158tests.append(test_sndt) 159 160 161def test_sndr(h, f): 162 if h[:2] == '\0\0': 163 rate = get_short_le(h[2:4]) 164 if 4000 <= rate <= 25000: 165 return 'sndr', rate, 1, -1, 8 166 167tests.append(test_sndr) 168 169 170#---------------------------------------------# 171# Subroutines to extract numbers from strings # 172#---------------------------------------------# 173 174def get_long_be(s): 175 return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3]) 176 177def get_long_le(s): 178 return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0]) 179 180def get_short_be(s): 181 return (ord(s[0])<<8) | ord(s[1]) 182 183def get_short_le(s): 184 return (ord(s[1])<<8) | ord(s[0]) 185 186 187#--------------------# 188# Small test program # 189#--------------------# 190 191def test(): 192 import sys 193 recursive = 0 194 if sys.argv[1:] and sys.argv[1] == '-r': 195 del sys.argv[1:2] 196 recursive = 1 197 try: 198 if sys.argv[1:]: 199 testall(sys.argv[1:], recursive, 1) 200 else: 201 testall(['.'], recursive, 1) 202 except KeyboardInterrupt: 203 sys.stderr.write('\n[Interrupted]\n') 204 sys.exit(1) 205 206def testall(list, recursive, toplevel): 207 import sys 208 import os 209 for filename in list: 210 if os.path.isdir(filename): 211 print filename + '/:', 212 if recursive or toplevel: 213 print 'recursing down:' 214 import glob 215 names = glob.glob(os.path.join(filename, '*')) 216 testall(names, recursive, 0) 217 else: 218 print '*** directory (use -r) ***' 219 else: 220 print filename + ':', 221 sys.stdout.flush() 222 try: 223 print what(filename) 224 except IOError: 225 print '*** not found ***' 226 227if __name__ == '__main__': 228 test() 229