Merge lp:~laurentf/arte+7recorder/arte+7recorder into lp:arte+7recorder

Proposed by beudbeud
Status: Merged
Merged at revision: 50
Proposed branch: lp:~laurentf/arte+7recorder/arte+7recorder
Merge into: lp:arte+7recorder
Diff against target: 211 lines (+115/-25)
2 files modified
arte7recorder/Catalog.py (+2/-0)
arte7recorder/arte7recorder.py (+113/-25)
To merge this branch: bzr merge lp:~laurentf/arte+7recorder/arte+7recorder
Reviewer Review Type Date Requested Status
beudbeud Approve
Review via email: mp+195134@code.launchpad.net
To post a comment you must log in.
Revision history for this message
beudbeud (beudbeud) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'arte7recorder/Catalog.py'
2--- arte7recorder/Catalog.py 2010-10-06 09:25:40 +0000
3+++ arte7recorder/Catalog.py 2013-11-13 20:38:32 +0000
4@@ -93,6 +93,8 @@
5 video = dict()
6 for h in i.findAll('h2'):
7 for a in h.findAll('a'):
8+ if a.string == None:
9+ continue
10 video['targetURL'] = self.ARTE_WEB_ROOT + a['href']
11 video['targetURL'] = video['targetURL'].replace("/fr/", lang)
12 video['bigTitle'] = unescape_html( a.string )
13
14=== modified file 'arte7recorder/arte7recorder.py'
15--- arte7recorder/arte7recorder.py 2010-10-06 09:25:40 +0000
16+++ arte7recorder/arte7recorder.py 2013-11-13 20:38:32 +0000
17@@ -16,6 +16,7 @@
18 import gettext
19 import pygtk
20 import BeautifulSoup as BS
21+import json
22 pygtk.require('2.0')
23
24 from Catalog import Catalog, unescape_html, get_lang
25@@ -39,18 +40,94 @@
26 def get_rtmp_url( url_page, quality ):
27 page_soup = BS.BeautifulSoup( urllib2.urlopen(url_page).read() )
28
29- movie_object = page_soup.find("object", classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000")
30- movie = movie_object.find("param", {"name":"movie"})
31- movie_url = "http" + unescape_xml(movie['value'].split("http")[-1])
32-
33- xml_soup = BS.BeautifulStoneSoup( urllib2.urlopen(movie_url).read() )
34- movie_url = xml_soup.find("video", {'lang': get_lang()})['ref']
35-
36- xml_soup = BS.BeautifulStoneSoup( urllib2.urlopen(movie_url).read() )
37- base_soup = xml_soup.find("urls")
38- movie_url = base_soup.find("url", {"quality": quality}).string
39+ vc = page_soup.find("div", {"class":"video-container"})
40+ vc_url = vc['arte_vp_url']
41+ #json_soup = BS.BeautifulStoneSoup
42+ json_tree = json.load( urllib2.urlopen(vc_url) )
43+ #obj = json_tree["videoJsonPlayer"]["VSR"]["RTMP_SQ_1"]
44+ #movie_url = obj["streamer"]+obj["url"]
45+ obj = json_tree["videoJsonPlayer"]["VSR"]["HTTP_REACH_EQ_1"]
46+ movie_url = obj["url"]
47+ ##FIXME!!! implement quality and language!!
48+ #print "-->",json_tree.videoJsonPlayer.VSR
49+ #movie_object = page_soup.find("object", classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000")
50+ #movie = movie_object.find("param", {"name":"movie"})
51+ #movie_url = "http" + unescape_xml(movie['value'].split("http")[-1])
52+
53+ #xml_soup = BS.BeautifulStoneSoup( urllib2.urlopen(movie_url).read() )
54+ #movie_url = xml_soup.find("video", {'lang': get_lang()})['ref']
55+
56+ #xml_soup = BS.BeautifulStoneSoup( urllib2.urlopen(movie_url).read() )
57+ #base_soup = xml_soup.find("urls")
58+ #movie_url = base_soup.find("url", {"quality": quality}).string
59 return movie_url
60
61+def http_download( link, destination = "/dev/null", try_resume = True, resuming = False ):
62+ global subprocess_pid
63+ #print link, destination
64+ some_dl_done = False
65+ need_more_dl = True
66+
67+ if try_resume and os.path.isfile( destination ):
68+ for percent in http_download(link, destination, False, True ):
69+ if percent != -1:
70+ some_dl_done = True
71+ need_more_dl = percent != 100.0
72+ yield percent
73+ else:
74+ break
75+
76+ max_skip_cnt = 15
77+ cmd_dl = 'wget "%s" -O "%s"' % (link, destination)
78+ cmd_resume = 'wget -c "%s" -O "%s"' % (link, destination)
79+ SECONDS_TO_WAIT = 5
80+ #percent_re = re.compile("\((.+)%\)$")
81+ percent_re = re.compile("([0-9,]+)%")
82+
83+ ret_code = None
84+ if some_dl_done or resuming:
85+ cmd = cmd_resume
86+ else:
87+ cmd = cmd_dl
88+ while need_more_dl:
89+ stderr_buff = ""
90+ whole_stderr_buff = ""
91+ p = subprocess.Popen( cmd, shell=True, stderr=subprocess.PIPE, close_fds=True)
92+ subprocess_pid = p.pid + 1
93+ while ret_code is None:
94+ fds_read, fds_write, fds_exception = select.select([p.stderr],[], [], SECONDS_TO_WAIT)
95+ if len(fds_read) == 1:
96+ #print p.stderr.read(100),fds_read,fds_write
97+ c = p.stderr.read(1)
98+ whole_stderr_buff += c
99+ if c in ("\n","\r"):
100+ match = percent_re.search( stderr_buff )
101+ if max_skip_cnt == 0:
102+ yield -1.0
103+ if match is not None:
104+ max_skip_cnt = 15
105+ yield float(match.group(1))
106+ else:
107+ max_skip_cnt -= 1
108+ stderr_buff = ""
109+ else:
110+ stderr_buff += c
111+ ret_code = p.poll()
112+ whole_stderr_buff += p.stderr.read()
113+ subprocess_pid = None
114+ if ret_code == 0:
115+ yield 100.0
116+ break
117+ elif ret_code == 2:
118+ cmd = cmd_resume
119+ else:
120+ print ret_code
121+ print whole_stderr_buff
122+ print
123+ yield -1.0
124+ ret_code = None
125+
126+
127 def rtmp_download( link, destination = "/dev/null", try_resume = True, resuming =False ):
128 global subprocess_pid
129 some_dl_done = False
130@@ -125,7 +202,7 @@
131 wmvRE = re.compile('availableFormats.*=.*"(.*HQ.*wmv.*)"')
132 mmsRE = re.compile('"(mms.*)"')
133 resumeRE = re.compile('<p class="text">([^<]*)<')
134- dureeRE = re.compile('[^0-9]*([0-9]+)(mn|min)')
135+ dureeRE = re.compile('[^0-9]*([0-9]+) (mn|min)')
136
137 def __init__(self):
138 self.staticon = gtk.StatusIcon()
139@@ -308,13 +385,15 @@
140 break
141 url_page = n[2]
142 self.nom_emi = n[0]
143- self.nom_fichier = self.nom_emi + "-" + n[1] + '.flv'
144+ #self.nom_fichier = self.nom_emi + "-" + n[1] + '.flv'
145+ self.nom_fichier = self.nom_emi + "-" + n[1] + '.mp4'
146 self.nom_fichier = self.nom_fichier.replace("/", "-")
147 self.liststore2.set_value(self.treeiter, 3, _('Download...'))
148 try:
149 rtmp_url = get_rtmp_url( url_page, quality = "hd" )
150 signal_fin = False
151- for percent in rtmp_download( rtmp_url, self.directory + "/" + self.nom_fichier.replace("'", "_") ):
152+ #for percent in rtmp_download( rtmp_url, self.directory + "/" + self.nom_fichier.replace("'", "_") ):
153+ for percent in http_download( rtmp_url, self.directory + "/" + self.nom_fichier.replace("'", "_") ):
154 if percent == -1.0:
155 raise IOError()
156 signal_fin = percent == 100.0
157@@ -333,6 +412,7 @@
158 self.erreur = self.builder.get_object("error_dialog")
159 self.error_text = self.builder.get_object("error_text")
160 self.error_text.set_text(_("There are problem with your internet connection"))
161+ self.liststore2.set_value(self.treeiter, 3, _('Error while downloading'))
162 self.erreur.run()
163 self.result = self.erreur.run()
164 self.erreur.destroy()
165@@ -411,18 +491,26 @@
166 page = urllib2.urlopen(data_url).read()
167 #data_resume = self.resumeRE.search(page).group(1).replace('\n', '').strip()
168 soup = BS.BeautifulSoup( page )
169- base_node = soup.find('div', {"class":"recentTracksCont"})
170+ #base_node = soup.find('div', {"class":"recentTracksCont"})
171+ base_node = soup.find('meta', {"name":"description"})
172 data_resume = u""
173- for i in base_node.findAll('p'):
174- if len(data_resume) != 0:
175- data_resume += "\n"
176- #print data_resume.replace("\n","\\n"), i.string
177- try:
178- data_resume += unescape_html(i.string)
179- if i["class"] == "accroche":
180- data_resume += "\n"
181- except:
182- pass
183+ #for i in base_node.findAll('p'):
184+ # if len(data_resume) != 0:
185+ # data_resume += "\n"
186+ # #print data_resume.replace("\n","\\n"), i.string
187+ # try:
188+ # data_resume += unescape_html(i.string)
189+ # if i["class"] == "accroche":
190+ # data_resume += "\n"
191+ # except:
192+ # pass
193+ try:
194+ res = base_node.get('content')
195+ except:
196+ pass
197+ if len(data_resume) != 0:
198+ data_resume += "\n"
199+ data_resume += unescape_html(res)
200 self.textbuffer1 = self.builder.get_object("textbuffer1")
201 self.textbuffer1.set_text(data_resume)
202 data_time = self.dureeRE.search(page).group(1)
203@@ -488,7 +576,7 @@
204 if __name__ == "__main__":
205 catalog = Catalog()
206 datalist = open('/tmp/database', 'w')
207- print >> datalist, '\n'.join(['%s;%s;%s;%s' % (video[Catalog.TITLE_TAG], video[Catalog.DATE_TAG], video[Catalog.URL_TAG], video[Catalog.IMAGE_TAG]) for video in catalog.videos])
208+ print >> datalist, '\n'.join(['%s;%s;%s;%s' % (video[Catalog.TITLE_TAG], video[Catalog.DATE_TAG], video[Catalog.URL_TAG], video[Catalog.IMAGE_TAG]) for video in catalog.videos if Catalog.TITLE_TAG in video])
209 datalist.close()
210 app = GUI()
211 gtk.main()

Subscribers

People subscribed via source and target branches