Merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 into lp:linaro-fetch-image

Proposed by James Tunnicliffe
Status: Merged
Approved by: Milo Casagrande
Approved revision: 18
Merged at revision: 18
Proposed branch: lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10
Merge into: lp:linaro-fetch-image
Diff against target: 313 lines (+162/-87)
2 files modified
linaro-image-indexer (+110/-55)
linaro_fetch_image/fetch_image.py (+52/-32)
To merge this branch: bzr merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10
Reviewer Review Type Date Requested Status
Milo Casagrande (community) Approve
Review via email: mp+128909@code.launchpad.net

Description of the change

Updates indexing to work with latest layout.
Adds android information to database, but doesn't build images yet.

To post a comment you must log in.
17. By James Tunnicliffe

Fixed that I had disabled some bits of the indexer while debugging.

18. By James Tunnicliffe

Index all Ubuntu releases
Store Ubuntu release name in column upstream_release (release tables only)
New column not used yet.

Revision history for this message
Milo Casagrande (milo) wrote :

Hi James,

changes look good to me.
+1

Note: there are some PEP8 warnings, should we fix them? I can manage to do that as maintenance task in case...

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'linaro-image-indexer'
2--- linaro-image-indexer 2012-03-06 13:37:37 +0000
3+++ linaro-image-indexer 2012-10-10 14:13:21 +0000
4@@ -27,13 +27,17 @@
5 import bz2
6 import linaro_fetch_image.fetch_image
7
8-RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www"
9 RELEASE_URL = "http://releases.linaro.org/"
10-OLD_RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www/platform"
11 OLD_RELEASE_URL = "http://releases.linaro.org/platform/"
12-SNAPSHOTS_WWW_DOCUMENT_ROOT = "/srv/snapshots.linaro.org/www/"
13 SNAPSHOTS_URL = "http://snapshots.linaro.org/"
14
15+BASE = "/srv"
16+BASE = "/home/dooferlad/dev/fetch_image/layout/srv"
17+RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE, "releases.linaro.org/www")
18+SNAPSHOTS_WWW_DOCUMENT_ROOT = os.path.join(BASE, "snapshots.linaro.org/www")
19+OLD_RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE,
20+ "releases.linaro.org/www/platform")
21+
22 class ServerIndexer():
23 """Create a database of files on the linaro image servers for use by image
24 creation tools."""
25@@ -108,8 +112,7 @@
26 relative_location, not_match):
27 not_match_ok = False
28
29- if( not (to_match_ok and not_match_ok)
30- or not re.search("\.gz$", file)):
31+ if(not (to_match_ok and not_match_ok)):
32 continue # URL doesn't match the validator. Ignore.
33
34 logging.getLogger("linaro_fetch_image").info(url)
35@@ -174,63 +177,115 @@
36 logger.addHandler(ch)
37
38 #linaro-n/ubuntu-desktop/11.09
39- crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT,
40- OLD_RELEASE_URL,
41- ([], ["platform/", "old/", "hwpack",
42- "alpha", "beta", "final", "leb",
43- "leb", "release-candidate"]),
44- ["platform", "image", "build=final"],
45- "release_binaries",
46- ["", "image", "platform"])
47+ crawler.add_directory_parse_list(
48+ OLD_RELEASES_WWW_DOCUMENT_ROOT,
49+ OLD_RELEASE_URL,
50+ (["\.gz$"],
51+ ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb",
52+ "release-candidate"]),
53+ ["platform", "image", "build=final", "upstream_release=unknown"],
54+ "release_binaries",
55+ ["", "image", "platform"])
56
57 #linaro-n/hwpacks/11.09
58- crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT,
59- OLD_RELEASE_URL,
60- (["/hwpacks/"],
61- ["alpha", "beta", "final", "leb",
62- "release-candidate"]),
63- ["platform", "hardware", "build=final"],
64- "release_hwpacks",
65- ["", "", "platform",
66- ("hardware", r"hwpack_linaro-(.*?)_")])
67+ crawler.add_directory_parse_list(
68+ OLD_RELEASES_WWW_DOCUMENT_ROOT,
69+ OLD_RELEASE_URL,
70+ (["/hwpacks/", "\.gz$"],
71+ ["alpha", "beta", "final", "leb", "release-candidate"]),
72+ ["platform", "hardware", "build=final", "upstream_release=unknown"],
73+ "release_hwpacks",
74+ ["", "", "platform",
75+ ("hardware", r"hwpack_linaro-(.*?)_")])
76
77 # 11.10/ubuntu/oneiric-images/ubuntu-desktop/
78 # NOT images/...
79- crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,
80- RELEASE_URL,
81- (["\d+\.\d+", "ubuntu", "oneiric-images"],
82- ["latest/", "platform/", "old/",
83- "hwpack", "^images/"]),
84- ["platform", "image", "build=final"],
85- "release_binaries",
86- ["platform", "", "", "image"])
87+ # 12.09/ubuntu/precise-images/ubuntu-desktop/
88+ # linaro-precise-ubuntu-desktop-20120923-436.tar.gz
89+ crawler.add_directory_parse_list(
90+ RELEASES_WWW_DOCUMENT_ROOT,
91+ RELEASE_URL,
92+ (["\d+\.\d+", "ubuntu", "\w+-images", "\.gz$"],
93+ ["latest/", "platform/", "old/", "hwpack", "^images/"]),
94+ ["platform", "image", "build=final", "upstream_release"],
95+ "release_binaries",
96+ ["platform", "", ("upstream_release", "(\w+)-images"), "image"])
97
98 # 11.10/ubuntu/oneiric-hwpacks/
99- crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,
100- RELEASE_URL,
101- (["\d+\.\d+", "ubuntu", "oneiric-hwpacks"],
102- ["latest/", "platform/", "old/",
103- "^images/"]),
104- ["platform", "hardware", "build=final"],
105- "release_hwpacks",
106- ["platform", "", "",
107- ("hardware", r"hwpack_linaro-(.*?)_")])
108-
109- #oneiric/linaro-o-alip/20111026/0/images/tar/
110- crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT,
111- SNAPSHOTS_URL,
112- (["^oneiric/"], ["/hwpack"]),
113- ["platform", "image", "date", "build"],
114- "snapshot_binaries",
115- ["platform", "image", "date", "build"])
116-
117- #oneiric/lt-panda-oneiric/20111026/0/images/hwpack/
118- crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT,
119- SNAPSHOTS_URL,
120- (["^oneiric/", "/hwpack"], []),
121- ["platform", "hardware", "date", "build"],
122- "snapshot_hwpacks",
123- ["platform", "hardware", "date", "build"])
124+ # 12.09/ubuntu/precise-hwpacks
125+ crawler.add_directory_parse_list(
126+ RELEASES_WWW_DOCUMENT_ROOT,
127+ RELEASE_URL,
128+ (["\d+\.\d+", "ubuntu", "-hwpacks", "\.gz$"],
129+ ["latest/", "platform/", "old/", "^images/"]),
130+ ["platform", "hardware", "build=final", "upstream_release"],
131+ "release_hwpacks",
132+ ["platform", "", ("upstream_release", "(\w+)-hwpacks"),
133+ ("hardware", r"hwpack_linaro-(.*?)_")])
134+
135+ # quantal/images/nano/127/linaro-quantal-nano-20120916-127.tar.gz
136+ crawler.add_directory_parse_list(
137+ SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir
138+ SNAPSHOTS_URL, # base_url
139+ # url_validator (match, don't match)
140+ (["^quantal/", "\.gz$"], ["/hwpack", "pre-built"]),
141+ ["platform", "image", "date", "build"], # db_columns
142+ "snapshot_binaries", # table
143+ ["linaro-"
144+ "(?P<platform>\w+)-"
145+ "(?P<image>[\w-]+?)[-_]+"
146+ "(?P<date>\d+)\D"
147+ "(?P<build>\d+)"
148+ ".*?\.tar\.gz"]) # url_chunks
149+
150+ # quantal/hwpacks/lt-panda/215/hwpack_linaro-lt-panda_20120927-215_armhf_supported.tar.gz
151+ crawler.add_directory_parse_list(
152+ SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir
153+ SNAPSHOTS_URL, # base_url
154+ # url_validator (match, don't match)
155+ (["^quantal/", "/hwpack", "\.gz$"], []),
156+ ["platform=quantal", "hardware", "date", "build"], # db_columns
157+ "snapshot_hwpacks", # table
158+ ["hwpack_linaro-"
159+ "(?P<hardware>.+?)_"
160+ "(?P<date>.+?)-"
161+ "(?P<build>\d+)_"
162+ ".*\.tar\.gz"]) # url_chunks
163+
164+ # www/12.05/android/images/panda-ics-gcc47-tilt-tracking-blob/
165+ # system.tar.bz2
166+ crawler.add_directory_parse_list(
167+ RELEASES_WWW_DOCUMENT_ROOT,
168+ RELEASE_URL,
169+ # url_validator (match, don't match)
170+ (["^\d+\.\d+/", "android", "images", "system.tar.bz2$"], []),
171+ ["release", "hardware"], # db_columns
172+ "android_release", # table
173+ ["release", "", "", "hardware"]) # url_chunks
174+
175+ # vexpress-rtsm-mp-jb-gcc47-armlt-stable-open/12/target/product/
176+ # vexpress/system.tar.bz2
177+ # NOT images/...
178+ crawler.add_directory_parse_list(
179+ RELEASES_WWW_DOCUMENT_ROOT,
180+ RELEASE_URL,
181+ # url_validator (match, don't match)
182+ (["^/android/~linaro-android/", "system.tar.bz2$"], []),
183+ ["build", "tag", "hardware"], # db_columns
184+ "android_snapshots", # table
185+ ["tag", "build", "", "", "hardware"]) # url_chunks
186+
187+ #linaro-n/android/11.04/panda/system.tar.bz2
188+ crawler.add_directory_parse_list(
189+ OLD_RELEASES_WWW_DOCUMENT_ROOT,
190+ OLD_RELEASE_URL,
191+ # url_validator (match, don't match)
192+ (["system\.tar\.bz2$"],
193+ ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb",
194+ "release-candidate"]),
195+ ["release", "hardware"], # db_columns
196+ "android_release", # table
197+ ["", "", "release", "hardware"]) # url_chunks
198
199 crawler.crawl()
200 crawler.clean_removed_urls_from_db()
201
202=== modified file 'linaro_fetch_image/fetch_image.py'
203--- linaro_fetch_image/fetch_image.py 2012-06-23 02:56:23 +0000
204+++ linaro_fetch_image/fetch_image.py 2012-10-10 14:13:21 +0000
205@@ -963,25 +963,31 @@
206 # url_chunks now contains all parts of the url, split on /,
207 # not including the base URL
208
209+ # Create a dictionary of column_name, value pairs to insert
210+ to_insert = {}
211+ # While doing this, create a list of columns without any extra
212+ # information used by fixed value columns
213+ db_columns = []
214+ for name in self.url_parse[index]["db_columns"]:
215+ name_search = re.search("(\w+)=(.*)", name)
216+ if name_search:
217+ to_insert[name_search.group(1)] = name_search.group(2)
218+ db_columns.append(name_search.group(1))
219+ else:
220+ to_insert[name] = None
221+ db_columns.append(name)
222+
223 # We now construct an SQL command to insert the index data into the
224 # database using the information we have.
225-
226 sqlcmd = "INSERT INTO " + table + " ("
227 length = 0
228- for name in (self.url_parse[index]["url_chunks"] + ["url"]):
229+ for name in (db_columns + ["url"]):
230 if name != "":
231 if isinstance(name, tuple):
232 name = name[0]
233 sqlcmd += name + ", "
234 length += 1
235
236- # Handle fixed value columns
237- for name in self.url_parse[index]["db_columns"]:
238- name_search = re.search("(\w+)=(.*)", name)
239- if name_search:
240- sqlcmd += name_search.group(1) + ", "
241- length += 1
242-
243 sqlcmd = sqlcmd.rstrip(", ") # get rid of unwanted space & comma
244 sqlcmd += ") VALUES ("
245
246@@ -993,31 +999,45 @@
247 # Get the parameters from the URL to record in the SQL database
248 sqlparams = []
249 chunk_index = 0
250- for name in self.url_parse[index]["url_chunks"]:
251- # If this part of the URL isn't a parameter, don't insert it
252- if name != "":
253- # If the entry is a tuple, it indicates it is of the form
254- # name, regexp
255- if isinstance(name, tuple):
256- # use stored regexp to extract data for the database
257- match = re.search(name[1], url_chunks[chunk_index])
258- assert match, ("Unable to match regexp to string ",
259- + url_chunks[chunk_index] + " " + name[1])
260- sqlparams.append(match.group(1))
261-
262- else:
263- sqlparams.append(url_chunks[chunk_index])
264-
265- chunk_index += 1
266-
267+
268+ # Two options for parsing a URL.
269+ # 1. Chunks (split up directories)
270+ # 2. 1 regexp
271+ # We parse as a regexp if there is only one URL chunk listed (we
272+ # assume 1 chunk is unusual!)
273+ if len(self.url_parse[index]["url_chunks"]) == 1:
274+ url_search = re.search(self.url_parse[index]["url_chunks"][0],
275+ url)
276+ if not url_search:
277+ print self.url_parse[index]["url_chunks"][0]
278+ print url
279+ print table
280+ exit(1)
281+ for column in db_columns:
282+ if not to_insert[column]:
283+ to_insert[column] = url_search.group(column)
284+ else:
285+ for name in self.url_parse[index]["url_chunks"]:
286+ # If this part of the URL isn't a parameter, don't insert it
287+ if name != "":
288+ # If the entry is a tuple, it indicates it is of the form
289+ # name, regexp
290+ if isinstance(name, tuple):
291+ # use stored regexp to extract data for the database
292+ match = re.search(name[1], url_chunks[chunk_index])
293+ assert match, ("Unable to match regexp to string ",
294+ + url_chunks[chunk_index] + " " + name[1])
295+ to_insert[name[0]] = match.group(1)
296+ else:
297+ to_insert[name] = url_chunks[chunk_index]
298+
299+ chunk_index += 1
300+
301+ # Put data into array for inserting into SQL command
302+ for column in db_columns:
303+ sqlparams.append(to_insert[column])
304 sqlparams.append(url)
305
306- # Handle fixed value columns
307- for name in self.url_parse[index]["db_columns"]:
308- name_search = re.search("(\w+)=(.*)", name)
309- if name_search:
310- sqlparams.append(name_search.group(2))
311-
312 logging.info("{0}: {1}".format(sqlcmd, sqlparams))
313 self.c.execute(sqlcmd, tuple(sqlparams))
314

Subscribers

People subscribed via source and target branches

to all changes: