Merge lp:~andrewsomething/dat-overview/lp1189808 into lp:dat-overview

Proposed by Andrew Starr-Bochicchio on 2013-07-10
Status: Merged
Merged at revision: 34
Proposed branch: lp:~andrewsomething/dat-overview/lp1189808
Merge into: lp:dat-overview
Diff against target: 60 lines (+22/-5)
1 file modified
overview/uploads/management/commands/migrate-upload-data.py (+22/-5)
To merge this branch: bzr merge lp:~andrewsomething/dat-overview/lp1189808
Reviewer Review Type Date Requested Status
Daniel Holbach 2013-07-10 Approve on 2013-07-11
Review via email: mp+174041@code.launchpad.net
To post a comment you must log in.
Daniel Holbach (dholbach) wrote :

Memory usage still goes up considerably during the process (top said 76% on a machine with 1.5G mem + 2G swap), but at least it makes it through a full pass. :-)

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'overview/uploads/management/commands/migrate-upload-data.py'
2--- overview/uploads/management/commands/migrate-upload-data.py 2013-06-11 09:08:16 +0000
3+++ overview/uploads/management/commands/migrate-upload-data.py 2013-07-10 20:21:25 +0000
4@@ -19,7 +19,7 @@
5 latest_entry = Uploads.objects.values(t).latest(t)[t]
6 except ObjectDoesNotExist:
7 latest_entry = None
8- for row in cursor.fetchall():
9+ for row in self.row_iter(cursor):
10 if latest_entry is None or row[0] > latest_entry:
11 if row[4] == row[6] or row[5] == row[7]:
12 spon_email = ''
13@@ -30,10 +30,25 @@
14 uploads = Uploads(timestamp=row[0], release=row[1],
15 package=row[2], version=row[3],
16 name_changer=row[4], email_changer=row[5],
17- name_sponsor=spon_name, email_sponsor=spon_email,
18+ name_sponsor=spon_name,
19+ email_sponsor=spon_email,
20 lpid_changer='' ,lpid_sponsor='')
21- bulk_insert.append(uploads)
22+ if len(bulk_insert) == 1000:
23+ Uploads.objects.bulk_create(bulk_insert)
24+ bulk_insert = []
25+ bulk_insert.append(uploads)
26+ else:
27+ bulk_insert.append(uploads)
28 Uploads.objects.bulk_create(bulk_insert)
29+ cursor.close()
30+
31+ def row_iter(self, cursor, size=1000):
32+ while True:
33+ rows = cursor.fetchmany(size)
34+ if not rows:
35+ break
36+ for row in rows:
37+ yield row
38
39 def add_lpids(self):
40 self.launchpad = lp('d-a-t', anonymous=True, lp_service='production')
41@@ -42,7 +57,8 @@
42 flat=True).distinct()
43 for e in changer_emails:
44 uploads = Uploads.objects.filter(email_changer=e)
45- if uploads[0].lpid_changer == '' and (uploads[0].email_changer not in ('', 'N/A')):
46+ if uploads[0].lpid_changer == '' and (
47+ uploads[0].email_changer not in ('', 'N/A')):
48 lpid = self.email_to_lp(e)
49 if lpid != '':
50 for ul in uploads.filter(lpid_changer=''):
51@@ -58,7 +74,8 @@
52 flat=True).distinct()
53 for e in sponsor_emails:
54 uploads = Uploads.objects.filter(email_sponsor=e)
55- if uploads[0].lpid_sponsor == '' and (uploads[0].email_sponsor not in ('', 'N/A')):
56+ if uploads[0].lpid_sponsor == '' and (
57+ uploads[0].email_sponsor not in ('', 'N/A')):
58 lpid = self.email_to_lp(e)
59 if lpid != '':
60 for ul in uploads.filter(lpid_sponsor=''):

Subscribers

People subscribed via source and target branches