Merge lp:~clement-grimal/qarte/update-arte-website into lp:qarte/qarte-3

Proposed by kokoklems
Status: Needs review
Proposed branch: lp:~clement-grimal/qarte/update-arte-website
Merge into: lp:qarte/qarte-3
Diff against target: 117 lines (+27/-18)
1 file modified
artetv.py (+27/-18)
To merge this branch: bzr merge lp:~clement-grimal/qarte/update-arte-website
Reviewer Review Type Date Requested Status
VinsS Pending
Review via email: mp+327223@code.launchpad.net

Description of the change

Fix the json parsing, but the thumbnails are broken.
I also disabled the remove_expired_videos function, as the field used in the json to get the expiration date is not available anymore.

To post a comment you must log in.

Unmerged revisions

181. By kokoklems

Fix fetch of the json data after ARTE website update. Thumbnails and expiration dates are broken!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'artetv.py'
2--- artetv.py 2017-04-26 15:52:49 +0000
3+++ artetv.py 2017-07-11 15:22:11 +0000
4@@ -137,6 +137,8 @@
5 print(video)
6 self.videos.append(TVItem(video))
7
8+ # lgg.warning('DDD :: ' + str(len(self.videos)))
9+
10 for video in self.old_videos:
11 # The two first pages contains some identical videos
12 if not video.idx in idxs:
13@@ -144,7 +146,8 @@
14 idxs.append(video.idx)
15
16 self.indexes = idxs[:]
17- Thread(target=self.remove_expired_videos).start()
18+ # TODO: rights_end field no longer available
19+ # Thread(target=self.remove_expired_videos).start()
20
21 def sort_recents(self):
22 """Sort the recent videos.
23@@ -853,6 +856,7 @@
24
25 def get_next_page(self):
26 url = self.url + next(self.day_before)
27+ # lgg.warning(url)
28 items = self.get_json(self.fetch_page(url))
29 videos = []
30 indexes = []
31@@ -862,36 +866,37 @@
32 if video:
33 videos.append(video)
34 indexes.append(video['id'])
35+ # lgg.warning('D :: ' + str(len(videos)))
36 return videos, indexes
37
38 def get_json(self, content):
39 if content:
40 try:
41- content = content.split('"tvguide":{"broadcasts":')[1]
42- content = content.split('"currentDate":')[0]
43- content = content.lstrip().rstrip(', \n')
44- return json.loads(content)
45+ # lgg.warning(content)
46+ content = content.split('window.__INITIAL_STATE__ = ')[1]
47+ content = content.split(';')[0]
48+ data = json.loads(content)
49+ items = data['page']['zones'][1]['teasers']
50+ # lgg.warning(items)
51+ return items
52 except Exception as why:
53 lgg.warning("Read json error: %s" % why)
54
55 def clean_item(self, item):
56 video = {}
57 is_live = False
58- for v in item["videos"]:
59- if v["kindLabel"] not in ("BANDE-ANNONCE", "EXTRAIT"):
60- is_live = True
61- if not is_live:
62+ # lgg.warning(item)
63+ if item["kind"] in ("BANDE-ANNONCE", "EXTRAIT"):
64 return False
65 try:
66 video["rights_end"] = v["videoRightsEnd"]
67 except:
68 video["rights_end"] = ""
69 try:
70- video['scheduled_on'] = item["broadcastBeginRounded"]
71+ video['scheduled_on'] = item['beginsAt']
72 except:
73 video['scheduled_on'] = ""
74
75- item = item["program"]
76 try:
77 video["url"] = item["url"]
78 video['id'] = item['programId']
79@@ -899,20 +904,22 @@
80 # No url, no video
81 return False
82
83- video["subtitle"] = item.get("shortDescription", "")
84+ video["subtitle"] = item.get("subtitle", "")
85 video["headerText"] = item.get("headerText", "")
86 video["title"] = item.get("title", "No title")
87 video["description"] = item.get("fullDescription", "")
88- video["duration"] = item.get("durationSeconds", "")
89+ video["duration"] = item.get("duration", 0)
90 video["teaser"] = item.get("teaserText", "")
91 video["programId"] = item.get("programId", "")
92 video['adult'] = None
93 video['views'] = 0
94- thumb = item.get("mainImage", "")
95- if thumb:
96- video["thumbnail_url"] = thumb.get("url", "")
97- video["credit"] = thumb.get("copyright", "")
98-
99+ try:
100+ video["thumbnail_url"] = item['images'][-1]['url']
101+ video["credit"] = '' #item['images'][-1]['copyright']
102+ except:
103+ video["thumbnail_url"] = ''
104+ video["credit"] = ''
105+
106 return video
107
108
109@@ -971,6 +978,8 @@
110 return datetime.now()
111
112 def format_duration(self, value):
113+ if not value:
114+ value = 0
115 min_, sec = divmod(value, 60)
116 if min_ < 60:
117 return "%s min. %s sec." %(min_, sec)

Subscribers

People subscribed via source and target branches

to all changes: