Merge lp:~andrewsomething/dat-overview/use_new_dump into lp:dat-overview

Proposed by Andrew Starr-Bochicchio
Status: Merged
Merged at revision: 23
Proposed branch: lp:~andrewsomething/dat-overview/use_new_dump
Merge into: lp:dat-overview
Diff against target: 142 lines (+35/-76)
2 files modified
overview/uploads/common/udd.py (+0/-64)
overview/uploads/management/commands/get-udd-data.py (+35/-12)
To merge this branch: bzr merge lp:~andrewsomething/dat-overview/use_new_dump
Reviewer Review Type Date Requested Status
Daniel Holbach Approve
Review via email: mp+168573@code.launchpad.net

Description of the change

This uses the new smaller udd dump. This fixes lp: #1188634, as we don't need to read it line by line to "extract_good_parts" I dropped the code that checks whether or not the file needs updating. Running 'manage.py update-all' does its own lock/stamp file dance, so there's no need to bother with it there as well.

I had to start calling "CREATE EXTENSION IF NOT EXISTS debversion" in the udd database after creating it. I'm not sure why that wasn't a problem before...

To post a comment you must log in.
Revision history for this message
Daniel Holbach (dholbach) wrote :

Fantastic work!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== removed file 'overview/uploads/common/udd.py'
2--- overview/uploads/common/udd.py 2013-04-08 17:03:42 +0000
3+++ overview/uploads/common/udd.py 1970-01-01 00:00:00 +0000
4@@ -1,64 +0,0 @@
5-import subprocess
6-import urllib
7-import codecs
8-import time
9-import re
10-import os
11-
12-from django.conf import settings
13-
14-UPDATE_INTERVAL = 60 * 60 * 24
15-URL = "http://udd.debian.org/udd.sql.gz"
16-LOCAL_FILE = os.path.join(settings.TEMP_PATH, "udd.sql.gz")
17-UNZIPPED_FILE = os.path.join(settings.TEMP_PATH, "udd.sql")
18-UPDATED_FILE = os.path.join(settings.TEMP_PATH, "new-udd.sql")
19-
20-def needs_update(data_file):
21- return (not os.path.exists(data_file)) or \
22- (time.time()-os.path.getctime(data_file) >= UPDATE_INTERVAL)
23-
24-def extract_good_parts():
25- new_script = []
26- stuff = [ "CREATE EXTENSION IF NOT EXISTS debversion WITH SCHEMA public;",
27- "CREATE TABLE ubuntu_upload_history (",
28- "COPY ubuntu_upload_history (",
29- "ALTER TABLE ONLY ubuntu_upload_history",
30- "REVOKE ALL ON TABLE ubuntu_upload_history FROM PUBLIC;",
31- "REVOKE ALL ON TABLE ubuntu_upload_history FROM udd;",
32- "GRANT ALL ON TABLE ubuntu_upload_history TO udd;",
33- "GRANT SELECT ON TABLE ubuntu_upload_history TO PUBLIC;",
34- ]
35- copy = False
36- with codecs.open(UNZIPPED_FILE, mode="r", encoding="utf-8", errors="replace") as f:
37- for line in f:
38- if filter(lambda a: line.startswith(a), stuff):
39- copy = True
40- if not line.strip():
41- copy = False
42- if copy:
43- new_line = re.sub("\\n", "\n", unicode(line.strip()))
44- new_script += [ new_line ]
45- new_script += [""]
46- if os.path.exists(UPDATED_FILE):
47- os.remove(UPDATED_FILE)
48- with codecs.open(UPDATED_FILE, mode="w", encoding="utf-8", errors="replace") as f:
49- f.write(u"\n".join(new_script))
50-
51-
52-def get_new_udd_data():
53- if needs_update(UNZIPPED_FILE):
54- try:
55- sock = urllib.urlopen(URL)
56- except:
57- return None
58- sock.close()
59- if os.path.exists(LOCAL_FILE):
60- os.remove(LOCAL_FILE)
61- urllib.urlretrieve(URL, LOCAL_FILE)
62- if os.path.exists(UNZIPPED_FILE):
63- os.remove(UNZIPPED_FILE)
64- subprocess.call(["gunzip", LOCAL_FILE])
65- extract_good_parts()
66- new_script = UPDATED_FILE
67- return new_script
68-
69
70=== modified file 'overview/uploads/management/commands/get-udd-data.py'
71--- overview/uploads/management/commands/get-udd-data.py 2013-04-09 02:45:57 +0000
72+++ overview/uploads/management/commands/get-udd-data.py 2013-06-11 01:26:32 +0000
73@@ -1,18 +1,33 @@
74 import subprocess
75 import sys, os
76+import urllib
77
78 from django.core.management.base import NoArgsCommand
79 from django.db import connection, transaction
80 from django.conf import settings
81 import psycopg2 as db
82
83-from uploads.common import udd
84+URL = "http://alioth.debian.org/~asb/udd/ubuntu_upload_history.sql"
85+LOCAL_FILE = os.path.join(settings.TEMP_PATH, "udd.sql")
86+UPDATED_FILE = os.path.join(settings.TEMP_PATH, "new-udd.sql")
87
88 class Command(NoArgsCommand):
89 help = "Update uploads data from UDD."
90
91+ def get_new_udd_data(self):
92+ try:
93+ sock = urllib.urlopen(URL)
94+ urllib.urlretrieve(URL, UPDATED_FILE)
95+ if os.path.exists(LOCAL_FILE):
96+ os.remove(LOCAL_FILE)
97+ os.rename(UPDATED_FILE, LOCAL_FILE)
98+ except:
99+ return None
100+ sock.close()
101+ return LOCAL_FILE
102+
103 def handle_noargs(self, **options):
104- new_script = udd.get_new_udd_data()
105+ new_script = self.get_new_udd_data()
106 if not new_script:
107 sys.exit(1)
108 dbinfo = settings.DATABASES['udd']
109@@ -24,16 +39,24 @@
110 cursor.execute("DROP DATABASE IF EXISTS " + dbinfo['NAME'])
111 cursor.execute("CREATE DATABASE " + dbinfo['NAME'] +
112 " WITH TEMPLATE=template0 ENCODING 'SQL_ASCII'")
113-# conn = db.connect(host=dbinfo['HOST'], dbname='udd',
114-# user=dbinfo['USER'], password=dbinfo['PASSWORD'],
115-# port=dbinfo['PORT'] or 5432)
116-# with conn.autocommit = True:
117-# cursor.execute(new_script)
118- dump = subprocess.Popen(["cat", new_script], stdout=subprocess.PIPE)
119+ conn.commit()
120+ cursor.close()
121+ conn.close()
122+
123+ conn = db.connect(host=dbinfo['HOST'], dbname=dbinfo['NAME'],
124+ user=dbinfo['USER'], password=dbinfo['PASSWORD'],
125+ port=dbinfo['PORT'] or 5432)
126+ cursor = conn.cursor()
127+ cursor.execute("CREATE EXTENSION IF NOT EXISTS debversion")
128+ conn.commit()
129+ cursor.close()
130+ conn.close()
131+
132 psql_env = os.environ.copy()
133 psql_env['PGPASSWORD'] = dbinfo['PASSWORD']
134- psql_cmd = ['psql', "-U", dbinfo['USER'], '-d', dbinfo['NAME']]
135- subprocess.call(psql_cmd, env=psql_env, stdin=dump.stdout,
136- stdout=subprocess.PIPE,
137- stderr=subprocess.PIPE)
138+ psql_cmd = ['psql',
139+ '-U', dbinfo['USER'],
140+ '-d', dbinfo['NAME'],
141+ '-f', new_script]
142+ subprocess.call(psql_cmd, env=psql_env)
143

Subscribers

People subscribed via source and target branches