Merge lp:~dholbach/harvest/581732 into lp:harvest

Proposed by Daniel Holbach on 2010-11-16
Status: Superseded
Proposed branch: lp:~dholbach/harvest/581732
Merge into: lp:harvest
Diff against target: 113 lines (+43/-20)
3 files modified
harvest/common/opportunity_lists.py (+35/-8)
harvest/opportunities/management/commands/updatelists.py (+2/-6)
harvest/opportunities/management/commands/updateopportunities.py (+6/-6)
To merge this branch: bzr merge lp:~dholbach/harvest/581732
Reviewer Review Type Date Requested Status
James Westby 2010-11-22 Approve on 2010-12-01
Dave Walker 2010-11-22 Pending
Dylan McCall 2010-11-22 Pending
harvest-dev 2010-11-16 Pending
Review via email: mp+40950@code.launchpad.net

This proposal supersedes a proposal from 2010-11-16.

This proposal has been superseded by a proposal from 2010-12-01.

To post a comment you must log in.
Daniel Holbach (dholbach) wrote :

Is there anybody out there?

James Westby (james-w) wrote :

83 + for extension in [ ".csv", ".csv.gz", ".cgi", ".json", ".json.gz" ]:
84 + if filename.endswith(extension):
85 + return filename.split(extension)[0]

There's a tiny chance of a bug here, but it's not new. This will do something different if
the filename is something.csv.csv or similar. Making it filename.rsplit would fix that.

Thanks,

James

review: Approve
lp:~dholbach/harvest/581732 updated on 2010-12-01
289. By Daniel Holbach on 2010-12-01

use rsplit to split off extension properly

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'harvest/common/opportunity_lists.py'
2--- harvest/common/opportunity_lists.py 2010-10-15 11:54:34 +0000
3+++ harvest/common/opportunity_lists.py 2010-12-01 15:32:23 +0000
4@@ -5,6 +5,35 @@
5 import csv
6 import os
7
8+def read_csv(url, sock):
9+ if url.endswith(".csv.gz"):
10+ import StringIO
11+ import gzip
12+ data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).readlines()
13+ else:
14+ data = sock.readlines()
15+ lines = filter(lambda a: a.strip()!="", data)
16+ return [map(unicode, [a for a in l]) for l in csv.reader(lines)]
17+
18+def read_json(url, sock):
19+ import json
20+ if url.endswith(".json.gz"):
21+ import StringIO
22+ import gzip
23+ data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).read()
24+ else:
25+ data = sock.read()
26+ entries = json.loads(data)
27+ return entries
28+
29+def convert_from_csv(entries):
30+ data = []
31+ for entry in entries:
32+ data += [{"source_package": entry[0],
33+ "link": entry[1],
34+ "short_description": entry[2]}]
35+ return data
36+
37 def read_entries(url, last_updated):
38 import time
39 import datetime
40@@ -18,6 +47,7 @@
41 return [None, None]
42
43 datetime_lm = None
44+ data = None
45 if sock.info().has_key("Last-Modified") and sock.info()["Last-Modified"]:
46 lm_string = sock.info()["Last-Modified"]
47 if lm_string:
48@@ -28,15 +58,12 @@
49 sock.close()
50 return [None,None]
51
52- if url.endswith(".csv.gz"):
53- import StringIO
54- import gzip
55- data = gzip.GzipFile(fileobj=StringIO.StringIO(sock.read())).readlines()
56- else:
57- data = sock.readlines()
58- lines = filter(lambda a: a.strip()!="", data)
59+ if url.endswith(".csv") or url.endswith(".csv.gz"):
60+ data = convert_from_csv(read_csv(url, sock))
61+ if url.endswith(".json") or url.endswith(".json.gz"):
62+ data = read_json(url, sock)
63 sock.close()
64- return ([map(unicode, [a for a in l]) for l in csv.reader(lines)], datetime_lm)
65+ return (data, datetime_lm)
66
67 def unify_list_entry(entry):
68 for e in entry:
69
70=== modified file 'harvest/opportunities/management/commands/updatelists.py'
71--- harvest/opportunities/management/commands/updatelists.py 2010-11-02 15:51:05 +0000
72+++ harvest/opportunities/management/commands/updatelists.py 2010-12-01 15:32:23 +0000
73@@ -41,12 +41,8 @@
74
75 def chop_name(self, list_url):
76 filename = os.path.basename(list_url)
77- if filename.endswith(".csv"):
78- return filename.split(".csv")[0]
79- if filename.endswith(".csv.gz"):
80- return filename.split(".csv.gz")[0]
81- if filename.endswith(".cgi"):
82- return filename.split(".cgi")[0]
83+ for extension in [ ".csv", ".csv.gz", ".cgi", ".json", ".json.gz" ]:
84+ filename = filename.rsplit(extension)[0]
85 return filename
86
87 def handle_noargs(self, **options):
88
89=== modified file 'harvest/opportunities/management/commands/updateopportunities.py'
90--- harvest/opportunities/management/commands/updateopportunities.py 2010-10-11 14:45:14 +0000
91+++ harvest/opportunities/management/commands/updateopportunities.py 2010-12-01 15:32:23 +0000
92@@ -22,15 +22,15 @@
93 if entries:
94 logger.debug("Has entries and was updated at %s." % new_date)
95 op_list.last_updated = new_date
96- for (sourcepackage, url, description) in filter(lambda a: len(a)==3, entries):
97- sp, created = models.SourcePackage.objects.get_or_create(name=sourcepackage)
98+ for entry in entries:
99+ sp, created = models.SourcePackage.objects.get_or_create(name=entry["source_package"])
100 try:
101- opportunity = models.Opportunity.objects.get(description=description,
102- url=url, sourcepackage=sp,
103+ opportunity = models.Opportunity.objects.get(description=entry["short_description"],
104+ url=entry["link"], sourcepackage=sp,
105 opportunitylist=op_list)
106 except models.Opportunity.DoesNotExist:
107- opportunity = models.Opportunity(description=description,
108- url=url, sourcepackage=sp,
109+ opportunity = models.Opportunity(description=entry["short_description"],
110+ url=entry["link"], sourcepackage=sp,
111 since=op_list.last_updated,
112 opportunitylist=op_list,
113 experience=op_list.experience)

Subscribers

People subscribed via source and target branches

to all changes: