Merge lp:~eh/coverlovin/mutagen into lp:coverlovin
- mutagen
- Merge into coverlovin
Proposed by
Eetu Huisman
Status: | Merged |
---|---|
Merged at revision: | 7 |
Proposed branch: | lp:~eh/coverlovin/mutagen |
Merge into: | lp:coverlovin |
Diff against target: |
670 lines (+10/-632) 2 files modified
coverlovin.py (+10/-5) id3reader.py (+0/-627) |
To merge this branch: | bzr merge lp:~eh/coverlovin/mutagen |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
James Stewart | Approve | ||
Review via email: mp+102965@code.launchpad.net |
Commit message
Description of the change
Added support for other types besides mp3 by replacing id3reader with a dependency to Mutagen.
To post a comment you must log in.
Revision history for this message
James Stewart (amorphic) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'coverlovin.py' | |||
2 | --- coverlovin.py 2012-02-06 14:08:17 +0000 | |||
3 | +++ coverlovin.py 2012-04-21 08:59:19 +0000 | |||
4 | @@ -10,7 +10,7 @@ | |||
5 | 10 | import os, sys | 10 | import os, sys |
6 | 11 | import urllib, urllib2 | 11 | import urllib, urllib2 |
7 | 12 | import simplejson | 12 | import simplejson |
9 | 13 | import id3reader | 13 | import mutagen |
10 | 14 | import logging | 14 | import logging |
11 | 15 | from optparse import OptionParser | 15 | from optparse import OptionParser |
12 | 16 | 16 | ||
13 | @@ -147,16 +147,21 @@ | |||
14 | 147 | fileFullPath = os.path.join(thisDir, file) | 147 | fileFullPath = os.path.join(thisDir, file) |
15 | 148 | if os.path.getsize(fileFullPath) > 128: | 148 | if os.path.getsize(fileFullPath) > 128: |
16 | 149 | 149 | ||
18 | 150 | # check file for id3 tag info | 150 | # check file for tag info |
19 | 151 | try: | 151 | try: |
21 | 152 | id3r = id3reader.Reader(fileFullPath) | 152 | fileInfo = mutagen.File(fileFullPath, easy=True) |
22 | 153 | except: | 153 | except: |
23 | 154 | log.info("Skipping " + file + " due to unicode error") | 154 | log.info("Skipping " + file + " due to unicode error") |
24 | 155 | continue | 155 | continue |
25 | 156 | 156 | ||
28 | 157 | artist = id3r.getValue('performer') | 157 | try: |
29 | 158 | album = id3r.getValue('album') | 158 | artist = fileInfo["artist"][0] |
30 | 159 | album = fileInfo["album"][0] | ||
31 | 160 | except: | ||
32 | 161 | log.debug("Artist or album of " + file + " not found, continuing") | ||
33 | 162 | continue | ||
34 | 159 | 163 | ||
35 | 164 | log.info("artist: " + artist + ", album: " + album) | ||
36 | 160 | # sanitise None values | 165 | # sanitise None values |
37 | 161 | if artist == None: artist = '' | 166 | if artist == None: artist = '' |
38 | 162 | if album == None: album = '' | 167 | if album == None: album = '' |
39 | 163 | 168 | ||
40 | === removed file 'id3reader.py' | |||
41 | --- id3reader.py 2011-02-06 03:46:54 +0000 | |||
42 | +++ id3reader.py 1970-01-01 00:00:00 +0000 | |||
43 | @@ -1,627 +0,0 @@ | |||
44 | 1 | """ Read ID3 tags from a file. | ||
45 | 2 | Ned Batchelder, http://nedbatchelder.com/code/modules/id3reader.html | ||
46 | 3 | """ | ||
47 | 4 | |||
48 | 5 | __version__ = '1.53.20070415' # History at the end of the file. | ||
49 | 6 | |||
50 | 7 | # ID3 specs: http://www.id3.org/develop.html | ||
51 | 8 | |||
52 | 9 | import struct, sys, zlib | ||
53 | 10 | |||
54 | 11 | # These are the text encodings, indexed by the first byte of a text value. | ||
55 | 12 | _encodings = ['iso8859-1', 'utf-16', 'utf-16be', 'utf-8'] | ||
56 | 13 | |||
57 | 14 | # Simple pseudo-id's, mapped to their various representations. | ||
58 | 15 | # Use these ids with getValue, and you don't need to know what | ||
59 | 16 | # version of ID3 the file contains. | ||
60 | 17 | _simpleDataMapping = { | ||
61 | 18 | 'album': ('TALB', 'TAL', 'v1album', 'TOAL'), | ||
62 | 19 | 'performer': ('TPE1', 'TP1', 'v1performer', 'TOPE'), | ||
63 | 20 | 'title': ('TIT2', 'TT2', 'v1title'), | ||
64 | 21 | 'track': ('TRCK', 'TRK', 'v1track'), | ||
65 | 22 | 'year': ('TYER', 'TYE', 'v1year'), | ||
66 | 23 | 'genre': ('TCON', 'TCO', 'v1genre'), | ||
67 | 24 | 'comment': ('COMM', 'COM', 'v1comment'), | ||
68 | 25 | } | ||
69 | 26 | |||
70 | 27 | # Provide booleans for older Pythons. | ||
71 | 28 | try: | ||
72 | 29 | True, False | ||
73 | 30 | except NameError: | ||
74 | 31 | True, False = 1==1, 1==0 | ||
75 | 32 | |||
76 | 33 | # Tracing | ||
77 | 34 | _t = False | ||
78 | 35 | def _trace(msg): | ||
79 | 36 | print msg | ||
80 | 37 | |||
81 | 38 | # Coverage | ||
82 | 39 | _c = False | ||
83 | 40 | _features = {} | ||
84 | 41 | def _coverage(feat): | ||
85 | 42 | #if _t: _trace('feature '+feat) | ||
86 | 43 | _features[feat] = _features.setdefault(feat, 0)+1 | ||
87 | 44 | |||
88 | 45 | def _safestr(s): | ||
89 | 46 | """ Get a good string for printing, that won't throw exceptions, | ||
90 | 47 | no matter what's in it. | ||
91 | 48 | """ | ||
92 | 49 | try: | ||
93 | 50 | return unicode(s).encode(sys.getdefaultencoding()) | ||
94 | 51 | except UnicodeError: | ||
95 | 52 | return '?: '+repr(s) | ||
96 | 53 | |||
97 | 54 | # Can I just say that I think the whole concept of genres is bogus, | ||
98 | 55 | # since they are so subjective? And the idea of letting someone else pick | ||
99 | 56 | # one of these things and then have it affect the categorization of my music | ||
100 | 57 | # is extra bogus. And the list itself is absurd. Polsk Punk? | ||
101 | 58 | _genres = [ | ||
102 | 59 | # 0-19 | ||
103 | 60 | 'Blues', 'Classic Rock', 'Country', 'Dance', 'Disco', 'Funk', 'Grunge', 'Hip - Hop', 'Jazz', 'Metal', | ||
104 | 61 | 'New Age', 'Oldies', 'Other', 'Pop', 'R&B', 'Rap', 'Reggae', 'Rock', 'Techno', 'Industrial', | ||
105 | 62 | # 20-39 | ||
106 | 63 | 'Alternative', 'Ska', 'Death Metal', 'Pranks', 'Soundtrack', 'Euro - Techno', 'Ambient', 'Trip - Hop', 'Vocal', 'Jazz + Funk', | ||
107 | 64 | 'Fusion', 'Trance', 'Classical', 'Instrumental', 'Acid', 'House', 'Game', 'Sound Clip', 'Gospel', 'Noise', | ||
108 | 65 | # 40-59 | ||
109 | 66 | 'Alt Rock', 'Bass', 'Soul', 'Punk', 'Space', 'Meditative', 'Instrumental Pop', 'Instrumental Rock', 'Ethnic', 'Gothic', | ||
110 | 67 | 'Darkwave', 'Techno - Industrial', 'Electronic', 'Pop - Folk', 'Eurodance', 'Dream', 'Southern Rock', 'Comedy', 'Cult', 'Gangsta Rap', | ||
111 | 68 | # 60-79 | ||
112 | 69 | 'Top 40', 'Christian Rap', 'Pop / Funk', 'Jungle', 'Native American', 'Cabaret', 'New Wave', 'Psychedelic', 'Rave', 'Showtunes', | ||
113 | 70 | 'Trailer', 'Lo - Fi', 'Tribal', 'Acid Punk', 'Acid Jazz', 'Polka', 'Retro', 'Musical', 'Rock & Roll', 'Hard Rock', | ||
114 | 71 | # 80-99 | ||
115 | 72 | 'Folk', 'Folk / Rock', 'National Folk', 'Swing', 'Fast - Fusion', 'Bebob', 'Latin', 'Revival', 'Celtic', 'Bluegrass', | ||
116 | 73 | 'Avantgarde', 'Gothic Rock', 'Progressive Rock', 'Psychedelic Rock', 'Symphonic Rock', 'Slow Rock', 'Big Band', 'Chorus', 'Easy Listening', 'Acoustic', | ||
117 | 74 | # 100-119 | ||
118 | 75 | 'Humour', 'Speech', 'Chanson', 'Opera', 'Chamber Music', 'Sonata', 'Symphony', 'Booty Bass', 'Primus', 'Porn Groove', | ||
119 | 76 | 'Satire', 'Slow Jam', 'Club', 'Tango', 'Samba', 'Folklore', 'Ballad', 'Power Ballad', 'Rhythmic Soul', 'Freestyle', | ||
120 | 77 | # 120-139 | ||
121 | 78 | 'Duet', 'Punk Rock', 'Drum Solo', 'A Cappella', 'Euro - House', 'Dance Hall', 'Goa', 'Drum & Bass', 'Club - House', 'Hardcore', | ||
122 | 79 | 'Terror', 'Indie', 'BritPop', 'Negerpunk', 'Polsk Punk', 'Beat', 'Christian Gangsta Rap', 'Heavy Metal', 'Black Metal', 'Crossover', | ||
123 | 80 | # 140-147 | ||
124 | 81 | 'Contemporary Christian', 'Christian Rock', 'Merengue', 'Salsa', 'Thrash Metal', 'Anime', 'JPop', 'Synthpop' | ||
125 | 82 | ] | ||
126 | 83 | |||
127 | 84 | class Id3Error(Exception): | ||
128 | 85 | """ An exception caused by id3reader properly handling a bad ID3 tag. | ||
129 | 86 | """ | ||
130 | 87 | pass | ||
131 | 88 | |||
132 | 89 | class _Header: | ||
133 | 90 | """ Represent the ID3 header in a tag. | ||
134 | 91 | """ | ||
135 | 92 | def __init__(self): | ||
136 | 93 | self.majorVersion = 0 | ||
137 | 94 | self.revision = 0 | ||
138 | 95 | self.flags = 0 | ||
139 | 96 | self.size = 0 | ||
140 | 97 | self.bUnsynchronized = False | ||
141 | 98 | self.bExperimental = False | ||
142 | 99 | self.bFooter = False | ||
143 | 100 | |||
144 | 101 | def __str__(self): | ||
145 | 102 | return str(self.__dict__) | ||
146 | 103 | |||
147 | 104 | class _Frame: | ||
148 | 105 | """ Represent an ID3 frame in a tag. | ||
149 | 106 | """ | ||
150 | 107 | def __init__(self): | ||
151 | 108 | self.id = '' | ||
152 | 109 | self.size = 0 | ||
153 | 110 | self.flags = 0 | ||
154 | 111 | self.rawData = '' | ||
155 | 112 | self.bTagAlterPreserve = False | ||
156 | 113 | self.bFileAlterPreserve = False | ||
157 | 114 | self.bReadOnly = False | ||
158 | 115 | self.bCompressed = False | ||
159 | 116 | self.bEncrypted = False | ||
160 | 117 | self.bInGroup = False | ||
161 | 118 | |||
162 | 119 | def __str__(self): | ||
163 | 120 | return str(self.__dict__) | ||
164 | 121 | |||
165 | 122 | def __repr__(self): | ||
166 | 123 | return str(self.__dict__) | ||
167 | 124 | |||
168 | 125 | def _interpret(self): | ||
169 | 126 | """ Examine self.rawData and create a self.value from it. | ||
170 | 127 | """ | ||
171 | 128 | if len(self.rawData) == 0: | ||
172 | 129 | # This is counter to the spec, but seems harmless enough. | ||
173 | 130 | #if _c: _coverage('zero data') | ||
174 | 131 | return | ||
175 | 132 | |||
176 | 133 | if self.bCompressed: | ||
177 | 134 | # Decompress the compressed data. | ||
178 | 135 | self.rawData = zlib.decompress(self.rawData) | ||
179 | 136 | |||
180 | 137 | if self.id[0] == 'T': | ||
181 | 138 | # Text fields start with T | ||
182 | 139 | encoding = ord(self.rawData[0]) | ||
183 | 140 | if 0 <= encoding < len(_encodings): | ||
184 | 141 | #if _c: _coverage('encoding%d' % encoding) | ||
185 | 142 | value = self.rawData[1:].decode(_encodings[encoding]) | ||
186 | 143 | else: | ||
187 | 144 | #if _c: _coverage('bad encoding') | ||
188 | 145 | value = self.rawData[1:] | ||
189 | 146 | # Don't let trailing zero bytes fool you. | ||
190 | 147 | if value: | ||
191 | 148 | value = value.strip('\0') | ||
192 | 149 | # The value can actually be a list. | ||
193 | 150 | if '\0' in value: | ||
194 | 151 | value = value.split('\0') | ||
195 | 152 | #if _c: _coverage('textlist') | ||
196 | 153 | self.value = value | ||
197 | 154 | elif self.id[0] == 'W': | ||
198 | 155 | # URL fields start with W | ||
199 | 156 | self.value = self.rawData.strip('\0') | ||
200 | 157 | if self.id == 'WXXX': | ||
201 | 158 | self.value = self.value.split('\0') | ||
202 | 159 | elif self.id == 'CDM': | ||
203 | 160 | # ID3v2.2.1 Compressed Data Metaframe | ||
204 | 161 | if self.rawData[0] == 'z': | ||
205 | 162 | self.rawData = zlib.decompress(self.rawData[5:]) | ||
206 | 163 | else: | ||
207 | 164 | #if _c: _coverage('badcdm!') | ||
208 | 165 | raise Id3Error, 'Unknown CDM compression: %02x' % self.rawData[0] | ||
209 | 166 | #@TODO: re-interpret the decompressed frame. | ||
210 | 167 | |||
211 | 168 | elif self.id in _simpleDataMapping['comment']: | ||
212 | 169 | # comment field | ||
213 | 170 | |||
214 | 171 | # In limited testing a typical comment looks like | ||
215 | 172 | # '\x00XXXID3v1 Comment\x00comment test' so in this | ||
216 | 173 | # case we need to find the second \x00 to know where | ||
217 | 174 | # where we start for a comment. In case we only find | ||
218 | 175 | # one \x00, lets just start at the beginning for the | ||
219 | 176 | # value | ||
220 | 177 | s = str(self.rawData) | ||
221 | 178 | |||
222 | 179 | pos = 0 | ||
223 | 180 | count = 0 | ||
224 | 181 | while pos < len(s) and count < 2: | ||
225 | 182 | if ord(s[pos]) == 0: | ||
226 | 183 | count = count + 1 | ||
227 | 184 | pos = pos + 1 | ||
228 | 185 | if count < 2: | ||
229 | 186 | pos = 1 | ||
230 | 187 | |||
231 | 188 | if pos > 0 and pos < len(s): | ||
232 | 189 | s = s[pos:] | ||
233 | 190 | if ord(s[-1]) == 0: | ||
234 | 191 | s = s[:-1] | ||
235 | 192 | |||
236 | 193 | self.value = s | ||
237 | 194 | |||
238 | 195 | class Reader: | ||
239 | 196 | """ An ID3 reader. | ||
240 | 197 | Create one on a file object, and then use getValue('TIT2') (for example) | ||
241 | 198 | to pull values. | ||
242 | 199 | """ | ||
243 | 200 | def __init__(self, file): | ||
244 | 201 | """ Create a reader from a file or filename. """ | ||
245 | 202 | self.file = file | ||
246 | 203 | self.header = None | ||
247 | 204 | self.frames = {} | ||
248 | 205 | self.allFrames = [] | ||
249 | 206 | self.bytesLeft = 0 | ||
250 | 207 | self.padbytes = '' | ||
251 | 208 | |||
252 | 209 | bCloseFile = False | ||
253 | 210 | # If self.file is a string of some sort, then open it to get a file. | ||
254 | 211 | if isinstance(self.file, (type(''), type(u''))): | ||
255 | 212 | self.file = open(self.file, 'rb') | ||
256 | 213 | bCloseFile = True | ||
257 | 214 | |||
258 | 215 | self._readId3() | ||
259 | 216 | |||
260 | 217 | if bCloseFile: | ||
261 | 218 | self.file.close() | ||
262 | 219 | |||
263 | 220 | def _readBytes(self, num, desc=''): | ||
264 | 221 | """ Read some bytes from the file. | ||
265 | 222 | This method implements the "unsynchronization" scheme, | ||
266 | 223 | where 0xFF bytes may have had 0x00 bytes stuffed after | ||
267 | 224 | them. These zero bytes have to be removed transparently. | ||
268 | 225 | """ | ||
269 | 226 | #if _t: _trace("ask %d (%s)" % (num,desc)) | ||
270 | 227 | if num > self.bytesLeft: | ||
271 | 228 | #if _c: _coverage('long!') | ||
272 | 229 | raise Id3Error, 'Long read (%s): (%d > %d)' % (desc, num, self.bytesLeft) | ||
273 | 230 | bytes = self.file.read(num) | ||
274 | 231 | self.bytesLeft -= num | ||
275 | 232 | |||
276 | 233 | if len(bytes) < num: | ||
277 | 234 | #if _t: _trace("short read with %d left, %d total" % (self.bytesLeft, self.header.size)) | ||
278 | 235 | #if _c: _coverage('short!') | ||
279 | 236 | raise Id3Error, 'Short read (%s): (%d < %d)' % (desc, len(bytes), num) | ||
280 | 237 | |||
281 | 238 | if self.header.bUnsynchronized: | ||
282 | 239 | nUnsync = 0 | ||
283 | 240 | i = 0 | ||
284 | 241 | while True: | ||
285 | 242 | i = bytes.find('\xFF\x00', i) | ||
286 | 243 | if i == -1: | ||
287 | 244 | break | ||
288 | 245 | #if _t: _trace("unsync at %d" % (i+1)) | ||
289 | 246 | #if _c: _coverage('unsyncbyte') | ||
290 | 247 | nUnsync += 1 | ||
291 | 248 | # This is a stuffed byte to remove | ||
292 | 249 | bytes = bytes[:i+1] + bytes[i+2:] | ||
293 | 250 | # Have to read one more byte from the file to adjust | ||
294 | 251 | bytes += self.file.read(1) | ||
295 | 252 | self.bytesLeft -= 1 | ||
296 | 253 | i += 1 | ||
297 | 254 | #if _t: _trace("unsync'ed %d" % (nUnsync)) | ||
298 | 255 | |||
299 | 256 | return bytes | ||
300 | 257 | |||
301 | 258 | def _unreadBytes(self, num): | ||
302 | 259 | self.file.seek(-num, 1) | ||
303 | 260 | self.bytesLeft += num | ||
304 | 261 | |||
305 | 262 | def _getSyncSafeInt(self, bytes): | ||
306 | 263 | assert len(bytes) == 4 | ||
307 | 264 | if type(bytes) == type(''): | ||
308 | 265 | bytes = [ ord(c) for c in bytes ] | ||
309 | 266 | return (bytes[0] << 21) + (bytes[1] << 14) + (bytes[2] << 7) + bytes[3] | ||
310 | 267 | |||
311 | 268 | def _getInteger(self, bytes): | ||
312 | 269 | i = 0; | ||
313 | 270 | if type(bytes) == type(''): | ||
314 | 271 | bytes = [ ord(c) for c in bytes ] | ||
315 | 272 | for b in bytes: | ||
316 | 273 | i = i*256+b | ||
317 | 274 | return i | ||
318 | 275 | |||
319 | 276 | def _addV1Frame(self, id, rawData): | ||
320 | 277 | if id == 'v1genre': | ||
321 | 278 | assert len(rawData) == 1 | ||
322 | 279 | nGenre = ord(rawData) | ||
323 | 280 | try: | ||
324 | 281 | value = _genres[nGenre] | ||
325 | 282 | except IndexError: | ||
326 | 283 | value = "(%d)" % nGenre | ||
327 | 284 | else: | ||
328 | 285 | value = rawData.strip(' \t\r\n').split('\0')[0] | ||
329 | 286 | if value: | ||
330 | 287 | frame = _Frame() | ||
331 | 288 | frame.id = id | ||
332 | 289 | frame.rawData = rawData | ||
333 | 290 | frame.value = value | ||
334 | 291 | self.frames[id] = frame | ||
335 | 292 | self.allFrames.append(frame) | ||
336 | 293 | |||
337 | 294 | def _pass(self): | ||
338 | 295 | """ Do nothing, for when we need to plug in a no-op function. | ||
339 | 296 | """ | ||
340 | 297 | pass | ||
341 | 298 | |||
342 | 299 | def _readId3(self): | ||
343 | 300 | header = self.file.read(10) | ||
344 | 301 | if len(header) < 10: | ||
345 | 302 | return | ||
346 | 303 | hstuff = struct.unpack('!3sBBBBBBB', header) | ||
347 | 304 | if hstuff[0] != "ID3": | ||
348 | 305 | # Doesn't look like an ID3v2 tag, | ||
349 | 306 | # Try reading an ID3v1 tag. | ||
350 | 307 | self._readId3v1() | ||
351 | 308 | return | ||
352 | 309 | |||
353 | 310 | self.header = _Header() | ||
354 | 311 | self.header.majorVersion = hstuff[1] | ||
355 | 312 | self.header.revision = hstuff[2] | ||
356 | 313 | self.header.flags = hstuff[3] | ||
357 | 314 | self.header.size = self._getSyncSafeInt(hstuff[4:8]) | ||
358 | 315 | |||
359 | 316 | self.bytesLeft = self.header.size | ||
360 | 317 | |||
361 | 318 | self._readExtHeader = self._pass | ||
362 | 319 | |||
363 | 320 | if self.header.majorVersion == 2: | ||
364 | 321 | #if _c: _coverage('id3v2.2.%d' % self.header.revision) | ||
365 | 322 | self._readFrame = self._readFrame_rev2 | ||
366 | 323 | elif self.header.majorVersion == 3: | ||
367 | 324 | #if _c: _coverage('id3v2.3.%d' % self.header.revision) | ||
368 | 325 | self._readFrame = self._readFrame_rev3 | ||
369 | 326 | elif self.header.majorVersion == 4: | ||
370 | 327 | #if _c: _coverage('id3v2.4.%d' % self.header.revision) | ||
371 | 328 | self._readFrame = self._readFrame_rev4 | ||
372 | 329 | else: | ||
373 | 330 | #if _c: _coverage('badmajor!') | ||
374 | 331 | raise Id3Error, "Unsupported major version: %d" % self.header.majorVersion | ||
375 | 332 | |||
376 | 333 | # Interpret the flags | ||
377 | 334 | self._interpretFlags() | ||
378 | 335 | |||
379 | 336 | # Read any extended header | ||
380 | 337 | self._readExtHeader() | ||
381 | 338 | |||
382 | 339 | # Read the frames | ||
383 | 340 | while self.bytesLeft > 0: | ||
384 | 341 | frame = self._readFrame() | ||
385 | 342 | if frame: | ||
386 | 343 | frame._interpret() | ||
387 | 344 | self.frames[frame.id] = frame | ||
388 | 345 | self.allFrames.append(frame) | ||
389 | 346 | else: | ||
390 | 347 | #if _c: _coverage('padding') | ||
391 | 348 | break | ||
392 | 349 | |||
393 | 350 | def _interpretFlags(self): | ||
394 | 351 | """ Interpret ID3v2.x flags. | ||
395 | 352 | """ | ||
396 | 353 | if self.header.flags & 0x80: | ||
397 | 354 | self.header.bUnsynchronized = True | ||
398 | 355 | #if _c: _coverage('unsynctag') | ||
399 | 356 | |||
400 | 357 | if self.header.majorVersion == 2: | ||
401 | 358 | if self.header.flags & 0x40: | ||
402 | 359 | #if _c: _coverage('compressed') | ||
403 | 360 | # "Since no compression scheme has been decided yet, | ||
404 | 361 | # the ID3 decoder (for now) should just ignore the entire | ||
405 | 362 | # tag if the compression bit is set." | ||
406 | 363 | self.header.bCompressed = True | ||
407 | 364 | |||
408 | 365 | if self.header.majorVersion >= 3: | ||
409 | 366 | if self.header.flags & 0x40: | ||
410 | 367 | #if _c: _coverage('extheader') | ||
411 | 368 | if self.header.majorVersion == 3: | ||
412 | 369 | self._readExtHeader = self._readExtHeader_rev3 | ||
413 | 370 | else: | ||
414 | 371 | self._readExtHeader = self._readExtHeader_rev4 | ||
415 | 372 | if self.header.flags & 0x20: | ||
416 | 373 | #if _c: _coverage('experimental') | ||
417 | 374 | self.header.bExperimental = True | ||
418 | 375 | |||
419 | 376 | if self.header.majorVersion >= 4: | ||
420 | 377 | if self.header.flags & 0x10: | ||
421 | 378 | #if _c: _coverage('footer') | ||
422 | 379 | self.header.bFooter = True | ||
423 | 380 | |||
424 | 381 | def _readExtHeader_rev3(self): | ||
425 | 382 | """ Read the ID3v2.3 extended header. | ||
426 | 383 | """ | ||
427 | 384 | # We don't interpret this yet, just eat the bytes. | ||
428 | 385 | size = self._getInteger(self._readBytes(4, 'rev3ehlen')) | ||
429 | 386 | self._readBytes(size, 'rev3ehdata') | ||
430 | 387 | |||
431 | 388 | def _readExtHeader_rev4(self): | ||
432 | 389 | """ Read the ID3v2.4 extended header. | ||
433 | 390 | """ | ||
434 | 391 | # We don't interpret this yet, just eat the bytes. | ||
435 | 392 | size = self._getSyncSafeInt(self._readBytes(4, 'rev4ehlen')) | ||
436 | 393 | self._readBytes(size-4, 'rev4ehdata') | ||
437 | 394 | |||
438 | 395 | def _readId3v1(self): | ||
439 | 396 | """ Read the ID3v1 tag. | ||
440 | 397 | spec: http://www.id3.org/id3v1.html | ||
441 | 398 | """ | ||
442 | 399 | self.file.seek(-128, 2) | ||
443 | 400 | tag = self.file.read(128) | ||
444 | 401 | if len(tag) != 128: | ||
445 | 402 | return | ||
446 | 403 | if tag[0:3] != 'TAG': | ||
447 | 404 | return | ||
448 | 405 | self.header = _Header() | ||
449 | 406 | self.header.majorVersion = 1 | ||
450 | 407 | self.header.revision = 0 | ||
451 | 408 | |||
452 | 409 | self._addV1Frame('v1title', tag[3:33]) | ||
453 | 410 | self._addV1Frame('v1performer', tag[33:63]) | ||
454 | 411 | self._addV1Frame('v1album', tag[63:93]) | ||
455 | 412 | self._addV1Frame('v1year', tag[93:97]) | ||
456 | 413 | self._addV1Frame('v1comment', tag[97:127]) | ||
457 | 414 | self._addV1Frame('v1genre', tag[127]) | ||
458 | 415 | if tag[125] == '\0' and tag[126] != '\0': | ||
459 | 416 | #if _c: _coverage('id3v1.1') | ||
460 | 417 | self.header.revision = 1 | ||
461 | 418 | self._addV1Frame('v1track', str(ord(tag[126]))) | ||
462 | 419 | else: | ||
463 | 420 | #if _c: _coverage('id3v1.0') | ||
464 | 421 | pass | ||
465 | 422 | return | ||
466 | 423 | |||
467 | 424 | _validIdChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' | ||
468 | 425 | |||
469 | 426 | def _isValidId(self, id): | ||
470 | 427 | """ Determine if the id bytes make a valid ID3 id. | ||
471 | 428 | """ | ||
472 | 429 | for c in id: | ||
473 | 430 | if not c in self._validIdChars: | ||
474 | 431 | #if _c: _coverage('bad id') | ||
475 | 432 | return False | ||
476 | 433 | #if _c: _coverage('id '+id) | ||
477 | 434 | return True | ||
478 | 435 | |||
479 | 436 | def _readFrame_rev2(self): | ||
480 | 437 | """ Read a frame for ID3v2.2: three-byte ids and lengths. | ||
481 | 438 | spec: http://www.id3.org/id3v2-00.txt | ||
482 | 439 | """ | ||
483 | 440 | if self.bytesLeft < 6: | ||
484 | 441 | return None | ||
485 | 442 | id = self._readBytes(3, 'rev2id') | ||
486 | 443 | if len(id) < 3 or not self._isValidId(id): | ||
487 | 444 | self._unreadBytes(len(id)) | ||
488 | 445 | return None | ||
489 | 446 | hstuff = struct.unpack('!BBB', self._readBytes(3, 'rev2len')) | ||
490 | 447 | frame = _Frame() | ||
491 | 448 | frame.id = id | ||
492 | 449 | frame.size = self._getInteger(hstuff[0:3]) | ||
493 | 450 | frame.rawData = self._readBytes(frame.size, 'rev2data') | ||
494 | 451 | return frame | ||
495 | 452 | |||
496 | 453 | def _readFrame_rev3(self): | ||
497 | 454 | """ Read a frame for ID3v2.3: four-byte ids and lengths. | ||
498 | 455 | """ | ||
499 | 456 | if self.bytesLeft < 10: | ||
500 | 457 | return None | ||
501 | 458 | id = self._readBytes(4,'rev3id') | ||
502 | 459 | if len(id) < 4 or not self._isValidId(id): | ||
503 | 460 | self._unreadBytes(len(id)) | ||
504 | 461 | return None | ||
505 | 462 | hstuff = struct.unpack('!BBBBh', self._readBytes(6,'rev3head')) | ||
506 | 463 | frame = _Frame() | ||
507 | 464 | frame.id = id | ||
508 | 465 | frame.size = self._getInteger(hstuff[0:4]) | ||
509 | 466 | cbData = frame.size | ||
510 | 467 | frame.flags = hstuff[4] | ||
511 | 468 | #if _t: _trace('flags = %x' % frame.flags) | ||
512 | 469 | frame.bTagAlterPreserve = (frame.flags & 0x8000 != 0) | ||
513 | 470 | frame.bFileAlterPreserve = (frame.flags & 0x4000 != 0) | ||
514 | 471 | frame.bReadOnly = (frame.flags & 0x2000 != 0) | ||
515 | 472 | frame.bCompressed = (frame.flags & 0x0080 != 0) | ||
516 | 473 | if frame.bCompressed: | ||
517 | 474 | frame.decompressedSize = self._getInteger(self._readBytes(4, 'decompsize')) | ||
518 | 475 | cbData -= 4 | ||
519 | 476 | #if _c: _coverage('compress') | ||
520 | 477 | frame.bEncrypted = (frame.flags & 0x0040 != 0) | ||
521 | 478 | if frame.bEncrypted: | ||
522 | 479 | frame.encryptionMethod = self._readBytes(1, 'encrmethod') | ||
523 | 480 | cbData -= 1 | ||
524 | 481 | #if _c: _coverage('encrypt') | ||
525 | 482 | frame.bInGroup = (frame.flags & 0x0020 != 0) | ||
526 | 483 | if frame.bInGroup: | ||
527 | 484 | frame.groupid = self._readBytes(1, 'groupid') | ||
528 | 485 | cbData -= 1 | ||
529 | 486 | #if _c: _coverage('groupid') | ||
530 | 487 | |||
531 | 488 | frame.rawData = self._readBytes(cbData, 'rev3data') | ||
532 | 489 | return frame | ||
533 | 490 | |||
534 | 491 | def _readFrame_rev4(self): | ||
535 | 492 | """ Read a frame for ID3v2.4: four-byte ids and lengths. | ||
536 | 493 | """ | ||
537 | 494 | if self.bytesLeft < 10: | ||
538 | 495 | return None | ||
539 | 496 | id = self._readBytes(4,'rev4id') | ||
540 | 497 | if len(id) < 4 or not self._isValidId(id): | ||
541 | 498 | self._unreadBytes(len(id)) | ||
542 | 499 | return None | ||
543 | 500 | hstuff = struct.unpack('!BBBBh', self._readBytes(6,'rev4head')) | ||
544 | 501 | frame = _Frame() | ||
545 | 502 | frame.id = id | ||
546 | 503 | frame.size = self._getSyncSafeInt(hstuff[0:4]) | ||
547 | 504 | cbData = frame.size | ||
548 | 505 | frame.flags = hstuff[4] | ||
549 | 506 | frame.bTagAlterPreserve = (frame.flags & 0x4000 != 0) | ||
550 | 507 | frame.bFileAlterPreserve = (frame.flags & 0x2000 != 0) | ||
551 | 508 | frame.bReadOnly = (frame.flags & 0x1000 != 0) | ||
552 | 509 | frame.bInGroup = (frame.flags & 0x0040 != 0) | ||
553 | 510 | if frame.bInGroup: | ||
554 | 511 | frame.groupid = self._readBytes(1, 'groupid') | ||
555 | 512 | cbData -= 1 | ||
556 | 513 | #if _c: _coverage('groupid') | ||
557 | 514 | |||
558 | 515 | frame.bCompressed = (frame.flags & 0x0008 != 0) | ||
559 | 516 | if frame.bCompressed: | ||
560 | 517 | #if _c: _coverage('compress') | ||
561 | 518 | pass | ||
562 | 519 | frame.bEncrypted = (frame.flags & 0x0004 != 0) | ||
563 | 520 | if frame.bEncrypted: | ||
564 | 521 | frame.encryptionMethod = self._readBytes(1, 'encrmethod') | ||
565 | 522 | cbData -= 1 | ||
566 | 523 | #if _c: _coverage('encrypt') | ||
567 | 524 | frame.bUnsynchronized = (frame.flags & 0x0002 != 0) | ||
568 | 525 | if frame.bUnsynchronized: | ||
569 | 526 | #if _c: _coverage('unsyncframe') | ||
570 | 527 | pass | ||
571 | 528 | if frame.flags & 0x0001: | ||
572 | 529 | frame.datalen = self._getSyncSafeInt(self._readBytes(4, 'datalen')) | ||
573 | 530 | cbData -= 4 | ||
574 | 531 | #if _c: _coverage('datalenindic') | ||
575 | 532 | |||
576 | 533 | frame.rawData = self._readBytes(cbData, 'rev3data') | ||
577 | 534 | |||
578 | 535 | return frame | ||
579 | 536 | |||
580 | 537 | def getValue(self, id): | ||
581 | 538 | """ Return the value for an ID3 tag id, or for a | ||
582 | 539 | convenience label ('title', 'performer', ...), | ||
583 | 540 | or return None if there is no such value. | ||
584 | 541 | """ | ||
585 | 542 | if self.frames.has_key(id): | ||
586 | 543 | if hasattr(self.frames[id], 'value'): | ||
587 | 544 | return self.frames[id].value | ||
588 | 545 | if _simpleDataMapping.has_key(id): | ||
589 | 546 | for id2 in _simpleDataMapping[id]: | ||
590 | 547 | v = self.getValue(id2) | ||
591 | 548 | if v: | ||
592 | 549 | return v | ||
593 | 550 | return None | ||
594 | 551 | |||
595 | 552 | def getRawData(self, id): | ||
596 | 553 | if self.frames.has_key(id): | ||
597 | 554 | return self.frames[id].rawData | ||
598 | 555 | return None | ||
599 | 556 | |||
600 | 557 | def dump(self): | ||
601 | 558 | import pprint | ||
602 | 559 | print "Header:" | ||
603 | 560 | print self.header | ||
604 | 561 | print "Frames:" | ||
605 | 562 | for fr in self.allFrames: | ||
606 | 563 | if len(fr.rawData) > 30: | ||
607 | 564 | fr.rawData = fr.rawData[:30] | ||
608 | 565 | pprint.pprint(self.allFrames) | ||
609 | 566 | for fr in self.allFrames: | ||
610 | 567 | if hasattr(fr, 'value'): | ||
611 | 568 | print '%s: %s' % (fr.id, _safestr(fr.value)) | ||
612 | 569 | else: | ||
613 | 570 | print '%s= %s' % (fr.id, _safestr(fr.rawData)) | ||
614 | 571 | for label in _simpleDataMapping.keys(): | ||
615 | 572 | v = self.getValue(label) | ||
616 | 573 | if v: | ||
617 | 574 | print 'Label %s: %s' % (label, _safestr(v)) | ||
618 | 575 | |||
619 | 576 | def dumpCoverage(self): | ||
620 | 577 | feats = _features.keys() | ||
621 | 578 | feats.sort() | ||
622 | 579 | for feat in feats: | ||
623 | 580 | print "Feature %-12s: %d" % (feat, _features[feat]) | ||
624 | 581 | |||
625 | 582 | if __name__ == '__main__': | ||
626 | 583 | if len(sys.argv) < 2 or '-?' in sys.argv: | ||
627 | 584 | print "Give me a filename" | ||
628 | 585 | else: | ||
629 | 586 | id3 = Reader(sys.argv[1]) | ||
630 | 587 | id3.dump() | ||
631 | 588 | #if _c: id3.dumpCoverage() | ||
632 | 589 | |||
633 | 590 | # History: | ||
634 | 591 | # 20040104: Created. | ||
635 | 592 | # 20040105: Two bugs: didn't read v1 properly, and didn't like empty strings in values. | ||
636 | 593 | # | ||
637 | 594 | # 20040109: Properly reads v2.3 properly (4-byte lens, but not synchsafe) | ||
638 | 595 | # Handles unsynchronized tags properly. | ||
639 | 596 | # | ||
640 | 597 | # 20040110: Total length was wrong for unsynchronized tags. | ||
641 | 598 | # Treat input filename better so path module can be used. | ||
642 | 599 | # Frame ids are more closely scrutinized for validity. | ||
643 | 600 | # Errors are now thrown as our own exception. | ||
644 | 601 | # Pad bytes aren't retained any more. | ||
645 | 602 | # Frame.value is not set if there is no interpretation performed. | ||
646 | 603 | # | ||
647 | 604 | # 20040111: Tracing and code coverage more formalized. | ||
648 | 605 | # Exceptions are now all Id3Error. | ||
649 | 606 | # Zero-length data in frames is handled pleasantly. | ||
650 | 607 | # Compressed frames are decompressed. | ||
651 | 608 | # Extended headers are read (but uninterpreted). | ||
652 | 609 | # Non-zero pad bytes are handled. | ||
653 | 610 | # Frame flags are read and interpreted. | ||
654 | 611 | # W*** frames are interpreted. | ||
655 | 612 | # Multi-string frames set .value to a list of strings. | ||
656 | 613 | # | ||
657 | 614 | # 20040113: Strip all trailing zero bytes from text strings. | ||
658 | 615 | # If we opened the file, we should close the file. | ||
659 | 616 | # | ||
660 | 617 | # 20040205: Do a better job printing strings without throwing. | ||
661 | 618 | # Support genre information, even if it is stupid. | ||
662 | 619 | # | ||
663 | 620 | # 20040913: When dumping strings, be more robust when trying to print | ||
664 | 621 | # non-character data. Thanks to Duane Harkness for the fix. | ||
665 | 622 | # | ||
666 | 623 | # 20061230: Fix ommission of self. in a few places. | ||
667 | 624 | # | ||
668 | 625 | # 20070415: Extended headers in ID3v2.4 weren't skipped properly, throwing | ||
669 | 626 | # everything out of whack. | ||
670 | 627 | # Be more generous about finding album and performer names in the tag. |