Merge lp:~renamer-developers/renamer/improve-tv-filename-parser into lp:renamer

Proposed by Jonathan Jacobs
Status: Merged
Approved by: Tristan Seligmann
Approved revision: 89
Merged at revision: 86
Proposed branch: lp:~renamer-developers/renamer/improve-tv-filename-parser
Merge into: lp:renamer
Diff against target: 399 lines (+221/-99)
4 files modified
DEPS (+1/-1)
renamer/plugins/tv.py (+116/-70)
renamer/test/test_tvrage.py (+75/-28)
scripts/generate_compiled_grammars (+29/-0)
To merge this branch: bzr merge lp:~renamer-developers/renamer/improve-tv-filename-parser
Reviewer Review Type Date Requested Status
Tristan Seligmann Approve
Review via email: mp+39383@code.launchpad.net

Description of the change

Replace PyParsing with PyMeta and implement metadata overrides for the "tvrage" command.

I'm wondering if I should put the grammar in a separate file and commit the result of the "generate_parser" script to the repo, the advantage would be improved import times.

To post a comment you must log in.
Revision history for this message
Tristan Seligmann (mithrandi) :
review: Approve
90. By Jonathan Jacobs

Fix misnamed test method.

91. By Jonathan Jacobs

Use precompiled filename grammar when available.

92. By Jonathan Jacobs

Script to generate precompiled grammar.

93. By Jonathan Jacobs

Damnit, pyflakes!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'DEPS'
2--- DEPS 2010-10-24 11:06:45 +0000
3+++ DEPS 2010-10-27 00:51:02 +0000
4@@ -1,6 +1,6 @@
5 Axiom 0.6.0
6 Epsilon 0.5.0
7 Mutagen 1.15
8-PyParsing 1.5.1
9+PyMeta 0.4.0
10 Python 2.5
11 Twisted 2.5.0
12
13=== modified file 'renamer/plugins/tv.py'
14--- renamer/plugins/tv.py 2010-10-15 08:51:27 +0000
15+++ renamer/plugins/tv.py 2010-10-27 00:51:02 +0000
16@@ -1,20 +1,77 @@
17 import string
18 import urllib
19
20+try:
21+ import pymeta
22+ from pymeta.grammar import OMeta
23+ from pymeta.runtime import ParseError
24+ pymeta # Ssssh, Pyflakes.
25+except ImportError:
26+ pymeta = None
27+
28 from twisted.web.client import getPage
29
30-try:
31- import pyparsing
32- from pyparsing import (
33- alphanums, nums, Word, Literal, ParseException, SkipTo, FollowedBy,
34- ZeroOrMore, Combine, NotAny, Optional, StringEnd)
35- pyparsing # Ssssh, Pyflakes.
36-except ImportError:
37- pyparsing = None
38-
39 from renamer import logging
40 from renamer.plugin import RenamingCommand
41 from renamer.errors import PluginError
42+try:
43+ from renamer._compiled_grammar.tv import Parser as FilenameGrammar
44+ FilenameGrammar # Ssssh, Pyflakes.
45+except ImportError:
46+ FilenameGrammar = None
47+
48+
49+
50+filenameGrammar = """
51+complete_strict ::= <series_strict>:series <separator> <episode_strict>:episode
52+ => series, episode
53+complete_lenient ::= <series_lenient>:series <separator> <episode_lenient>:episode
54+ => series, episode
55+partial_silly ::= <series_silly>:series <separator> <episode_silly>:episode
56+ => series, episode
57+only_episode_silly ::= <episode_silly>:episode
58+ => None, episode
59+only_episode ::= <episode_strict>:episode
60+ => None, episode
61+only_series ::= (<series_word>:word <separator> => word)+:words
62+ => ' '.join(words), [None, None]
63+
64+separator ::= <hard_separator> | <soft_separator>
65+soft_separator ::= '.' | ' ' | '-' | '_'
66+hard_separator ::= ('_' '-' '_'
67+ |' ' '-' ' '
68+ |'.' '-' '.')
69+
70+series_strict ::= (<series_word>:word <separator> ~(<episode_strict> <separator>) => word)*:words <series_word>:word
71+ => ' '.join(words + [word])
72+series_lenient ::= (<series_word>:word <separator> ~(<episode_lenient> <separator>) => word)*:words <series_word>:word
73+ => ' '.join(words + [word])
74+series_silly ::= (<series_word>:word <soft_separator> ~(<episode_silly> <separator>) => word)*:words <separator>
75+ => ' '.join(words)
76+series_word ::= (<letter> | <digit>)+:name => ''.join(name)
77+
78+episode_strict ::= (<episode_x> | <episode_x2> | <episode_lettered>):ep
79+ => map(''.join, ep)
80+episode_lenient ::= (<episode_strict> | <episode_numbers>):ep
81+ => map(''.join, ep)
82+episode_silly ::= <digit>+:ep
83+ => map(''.join, [ep, ep])
84+
85+episode_lettered ::= ('S' | 's') <digit>+:season ('E' | 'e') <digit>+:episode
86+ => season, episode
87+episode_numbers ::= <digit>:a <digit>:b <digit>:c <digit>?:d
88+ => ([a, b], [c, d]) if d else ([a], [b, c])
89+episode_x ::= <digit>+:season 'x' <digit>+:episode
90+ => season, episode
91+episode_x2 ::= '[' <digit>+:season 'x' <digit>+:episode ']'
92+ => season, episode
93+"""
94+
95+
96+
97+if FilenameGrammar is None:
98+ class FilenameGrammar(OMeta.makeGrammar(filenameGrammar, globals())):
99+ pass
100
101
102
103@@ -39,56 +96,16 @@
104 '$series [${season}x${padded_episode}] - $title')
105
106
107+ optParameters = [
108+ ('series', None, None, 'Override series name.'),
109+ ('season', None, None, 'Override season number.', int),
110+ ('episode', None, None, 'Override episode number.', int)]
111+
112+
113 def postOptions(self):
114- self.filenameParser = self._createParser()
115-
116-
117- def _createParser(self):
118- """
119- Create the filename parser.
120- """
121- if pyparsing is None:
122+ if pymeta is None:
123 raise PluginError(
124- 'The "pyparsing" package is required for this command')
125-
126- def L(value):
127- return Literal(value).suppress()
128-
129- number = Word(nums)
130- digit = Word(nums, exact=1)
131-
132- separator = ( Literal('_-_')
133- | Literal(' - ')
134- | Literal('.-.')
135- | Literal('-')
136- | Literal('.')
137- | Literal('_')
138- | Literal(' '))
139- separator = separator.suppress().leaveWhitespace()
140-
141- season = number.setResultsName('season')
142- exact_season = Word(nums, exact=2).setResultsName('season')
143- short_season = digit.setResultsName('season')
144- epnum = number.setResultsName('ep')
145- exact_epnum = Word(nums, exact=2).setResultsName('ep')
146- episode = ( season + L('x') + epnum
147- | L('[') + season + L('x') + epnum + L(']')
148- | L('S') + season + L('E') + epnum
149- | L('s') + season + L('e') + epnum
150- | exact_season + exact_epnum
151- | short_season + exact_epnum)
152-
153- series_word = Word(alphanums)
154- series = ZeroOrMore(
155- series_word + separator + NotAny(episode + separator)) + series_word
156- series = Combine(series, joinString=' ').setResultsName('series_name')
157-
158- extension = '.' + Word(alphanums).setResultsName('ext') + StringEnd()
159-
160- title = SkipTo(FollowedBy(extension))
161-
162- return (series + separator + episode + Optional(separator + title) +
163- extension)
164+ 'The "pymeta" package is required for this command')
165
166
167 def buildMapping(self, (seriesName, season, episode, episodeName)):
168@@ -101,20 +118,49 @@
169 title=episodeName)
170
171
172- def extractParts(self, filename):
173+ def extractParts(self, filename, overrides=None):
174 """
175 Get TV episode information from a filename.
176 """
177- try:
178- parse = self.filenameParser.parseString(filename)
179- except ParseException, e:
180- raise PluginError(
181- 'No patterns could be found in "%s" (%r)' % (filename, e))
182- else:
183- parts = parse.series_name, parse.season, parse.ep, parse.ext
184- logging.msg('Found parts in "%s": %r' % (filename, parts),
185- verbosity=4)
186- return parts
187+ if overrides is None:
188+ overrides = {}
189+
190+ rules = ['complete_strict', 'complete_lenient']
191+ # We can only try the partial rules if there are some overrides.
192+ if filter(None, overrides.values()):
193+ rules.extend([
194+ 'only_episode',
195+ 'partial_silly',
196+ 'only_series',
197+ 'only_episode_silly'])
198+
199+ for rule in rules:
200+ g = FilenameGrammar(filename)
201+ logging.msg('Trying grammar rule "%s"' % (rule,),
202+ verbosity=5)
203+ try:
204+ res, err = g.apply(rule)
205+ except ParseError, e:
206+ try:
207+ logging.msg('Parsing error:', verbosity=5)
208+ for line in (e.formatError(filename).strip()).splitlines():
209+ logging.msg(line, verbosity=5)
210+ except:
211+ pass
212+ continue
213+ else:
214+ series, (season, episode) = res
215+ parts = (
216+ overrides.get('series') or series,
217+ overrides.get('season') or season,
218+ overrides.get('episode') or episode)
219+ if None not in parts:
220+ logging.msg('Found parts in "%s": %r' % (filename, parts),
221+ verbosity=4)
222+ return parts
223+
224+ raise PluginError(
225+ 'No patterns could be found in "%s"' % (filename))
226
227
228 def extractMetadata(self, pageData):
229@@ -147,8 +193,8 @@
230 # IRenamerCommand
231
232 def processArgument(self, arg):
233- # XXX: why does our pattern care about the extension?
234- seriesName, season, episode, ext = self.extractParts(arg.basename())
235+ seriesName, season, episode = self.extractParts(
236+ arg.basename(), overrides=self)
237 d = self.lookupMetadata(seriesName, season, episode)
238 d.addCallback(self.buildMapping)
239 return d
240
241=== modified file 'renamer/test/test_tvrage.py'
242--- renamer/test/test_tvrage.py 2010-10-01 15:45:57 +0000
243+++ renamer/test/test_tvrage.py 2010-10-27 00:51:02 +0000
244@@ -22,27 +22,25 @@
245 Tests for L{renamer.plugins.tv.TVRage}.
246 """
247 cases = [
248- ('Profiler - S01E01 - Insight.avi', 'Profiler', '01', '01', 'avi'),
249- ('Heroes [1x01] - Genesis.avi', 'Heroes', '1', '01', 'avi'),
250- ('Heroes S01E10 HDTV XviD.avi', 'Heroes', '01', '10', 'avi'),
251- ('heroes.108.hdtv-lol.avi', 'heroes', '1', '08', 'avi'),
252- ('arrested.development.302.avi', 'arrested development', '3', '02', 'avi'),
253- ('Heroes.S01E11.HDTV.XviD-K4RM4.avi', 'Heroes', '01', '11', 'avi'),
254- ('How I Met Your Mother - 101 - Pilot.avi', 'How I Met Your Mother', '1', '01', 'avi'),
255- ('24.s6e4.dvdrip.xvid-aerial.avi', '24', '6', '4', 'avi'),
256- ('harsh.realm.-.1x01.-.pilot.avi', 'harsh realm', '1', '01', 'avi'),
257- ('DayBreak_S01E09.avi', 'DayBreak', '01', '09', 'avi'),
258- ('Xena - 2x05 - Return of Callisto.avi', 'Xena', '2', '05', 'avi'),
259- ('Sliders_-_4x22_Revelations_(divx).avi', 'Sliders', '4', '22', 'avi'),
260- ('Xena_4x02_Adventures In The Sin Trade - Part 2.avi', 'Xena', '4', '02', 'avi'),
261- ('Sliders 501 - The Unstuck Man.avi', 'Sliders', '5', '01', 'avi'),
262- ('buffy.2x03.dvdrip.xvid-tns.avi', 'buffy', '2', '03', 'avi'),
263- # XXX: This is broken and probably has been for a long time, it would
264- # be nice if it worked again.
265- #('the.4400.1x05.avi', 'the 4400', '1', '05', 'avi'),
266- # This should work, but doesn't.
267- #('flash.gordon.2007.s01e02.dvdrip.xvid-reward.avi', 'flash gordon 2007', '01', '02', 'avi'),
268- ('ReGenesis - 1x13.avi', 'ReGenesis', '1', '13', 'avi')]
269+ ('Profiler - S01E01 - Insight.avi', 'Profiler', '01', '01'),
270+ ('Heroes [1x01] - Genesis.avi', 'Heroes', '1', '01'),
271+ ('Heroes S01E10 HDTV XviD.avi', 'Heroes', '01', '10'),
272+ ('heroes.108.hdtv-lol.avi', 'heroes', '1', '08'),
273+ ('arrested.development.302.avi', 'arrested development', '3', '02'),
274+ ('Heroes.S01E11.HDTV.XviD-K4RM4.avi', 'Heroes', '01', '11'),
275+ ('How I Met Your Mother - 101 - Pilot.avi', 'How I Met Your Mother', '1', '01'),
276+ ('24.s6e4.dvdrip.xvid-aerial.avi', '24', '6', '4'),
277+ ('harsh.realm.-.1x01.-.pilot.avi', 'harsh realm', '1', '01'),
278+ ('DayBreak_S01E09.avi', 'DayBreak', '01', '09'),
279+ ('Xena - 2x05 - Return of Callisto.avi', 'Xena', '2', '05'),
280+ ('Sliders_-_4x22_Revelations_(divx).avi', 'Sliders', '4', '22'),
281+ ('Xena_4x02_Adventures In The Sin Trade - Part 2.avi', 'Xena', '4', '02'),
282+ ('Sliders 501 - The Unstuck Man.avi', 'Sliders', '5', '01'),
283+ ('buffy.2x03.dvdrip.xvid-tns.avi', 'buffy', '2', '03'),
284+ ('the.4400.1x05.avi', 'the 4400', '1', '05'),
285+ ('flash.gordon.2007.s01e02.dvdrip.xvid-reward.avi', 'flash gordon 2007', '01', '02'),
286+ ('Foo - 508 - The cat has 9 lives.avi', 'Foo', '5', '08'),
287+ ('ReGenesis - 1x13.avi', 'ReGenesis', '1', '13')]
288
289
290 def setUp(self):
291@@ -71,17 +69,66 @@
292 self.plugin.extractParts, 'thiswillnotwork')
293
294
295- def test_missingPyParsing(self):
296- """
297- Attempting to use the TV Rage plugin without PyParsing installed raises
298- a L{renamer.errors.PluginError}.
299- """
300- self.patch(tv, 'pyparsing', None)
301+ def test_extractPartsWithOverrides(self):
302+ """
303+ Override parts take preference when extracting TV show information from
304+ filenames.
305+ """
306+ overrideCases = [
307+ dict(series='House'),
308+ dict(season=51),
309+ dict(episode=99),
310+ dict(series='House', season=51),
311+ dict(series='House', episode=99),
312+ dict(series='House', season=51, episode=99)]
313+
314+ for overrides in overrideCases:
315+ for case in self.cases:
316+ expected = (
317+ overrides.get('series', case[1]),
318+ overrides.get('season', case[2]),
319+ overrides.get('episode', case[3]))
320+ self.assertEquals(
321+ self.plugin.extractParts(case[0], overrides=overrides),
322+ expected)
323+
324+
325+ def test_extractPartsPartial(self):
326+ """
327+ When providing overrides partial information can be extracted from
328+ filenames and combined with the override values.
329+ """
330+ cases = [
331+ ('s01e02.avi', 'House', '01', '02', dict(series='House')),
332+ ('House - 1.avi', 'House', 3, '1', dict(season=3)),
333+ ('Arrested Development - 1.avi', 'Arrested Development', '1', 2, dict(episode=2)),
334+ ('Chuck.avi', 'Chuck', 1, 2, dict(season=1, episode=2)),
335+ ('Stargate SG1.avi', 'Stargate SG1', 1, 2, dict(season=1, episode=2)),
336+ ('How I Met Your Mother.avi', 'How I Met Your Mother', 1, 2, dict(season=1, episode=2)),
337+ ('1.avi', 'House', 1, '1', dict(series='House', season=1)),
338+ ('1.avi', 'House', '1', 2, dict(series='House', episode=2))]
339+
340+ self.assertRaises(errors.PluginError,
341+ self.plugin.extractParts, 's01e01.avi')
342+
343+ for filename, series, season, episode, overrides in cases:
344+ self.plugin.extractParts(filename, overrides=overrides),
345+ self.assertEquals(
346+ self.plugin.extractParts(filename, overrides=overrides),
347+ (series, season, episode))
348+
349+
350+ def test_missingPyMeta(self):
351+ """
352+ Attempting to use the TV Rage plugin without PyMeta installed raises a
353+ L{renamer.errors.PluginError}.
354+ """
355+ self.patch(tv, 'pymeta', None)
356 plugin = tv.TVRage()
357 plugin.parent = DummyPluginParent()
358 e = self.assertRaises(errors.PluginError, plugin.postOptions)
359 self.assertEquals(
360- str(e), 'The "pyparsing" package is required for this command')
361+ str(e), 'The "pymeta" package is required for this command')
362
363
364 def test_extractMetadata(self):
365
366=== added directory 'scripts'
367=== added file 'scripts/generate_compiled_grammars'
368--- scripts/generate_compiled_grammars 1970-01-01 00:00:00 +0000
369+++ scripts/generate_compiled_grammars 2010-10-27 00:51:02 +0000
370@@ -0,0 +1,29 @@
371+#!/usr/bin/env zsh -i
372+
373+set -e
374+
375+COMPILE_PATH=renamer/_compiled_grammar
376+
377+if [ -d $COMPILE_PATH ]; then
378+ echo $COMPILE_PATH already exists
379+ exit 1
380+fi
381+
382+mkdir -p $COMPILE_PATH
383+touch $COMPILE_PATH/__init__.py
384+
385+function namedAny() {
386+ python -c "from twisted.python.reflect import namedAny; print namedAny('$1')"
387+ return
388+}
389+
390+typeset -A FQPNS
391+FQPNS=(
392+ tv renamer.plugins.tv.filenameGrammar)
393+
394+for key in ${(k)FQPNS}; do
395+ in=$FQPNS[$key]
396+ out=$COMPILE_PATH/${key}.py
397+ echo Generating $out from $in
398+ namedAny $in | generate_parser - - > $out
399+done;

Subscribers

People subscribed via source and target branches