Merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 into lp:linaro-fetch-image

Proposed by James Tunnicliffe
Status: Merged
Approved by: Milo Casagrande
Approved revision: 18
Merged at revision: 18
Proposed branch: lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10
Merge into: lp:linaro-fetch-image
Diff against target: 313 lines (+162/-87)
2 files modified
linaro-image-indexer (+110/-55)
linaro_fetch_image/fetch_image.py (+52/-32)
To merge this branch: bzr merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10
Reviewer Review Type Date Requested Status
Milo Casagrande (community) Approve
Review via email: mp+128909@code.launchpad.net

Description of the change

Updates indexing to work with latest layout.
Adds android information to database, but doesn't build images yet.

To post a comment you must log in.
17. By James Tunnicliffe

Fixed that I had disabled some bits of the indexer while debugging.

18. By James Tunnicliffe

Index all Ubuntu releases
Store Ubuntu release name in column upstream_release (release tables only)
New column not used yet.

Revision history for this message
Milo Casagrande (milo) wrote :

Hi James,

changes look good to me.
+1

Note: there are some PEP8 warnings, should we fix them? I can manage to do that as maintenance task in case...

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'linaro-image-indexer'
--- linaro-image-indexer 2012-03-06 13:37:37 +0000
+++ linaro-image-indexer 2012-10-10 14:13:21 +0000
@@ -27,13 +27,17 @@
27import bz227import bz2
28import linaro_fetch_image.fetch_image28import linaro_fetch_image.fetch_image
2929
30RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www"
31RELEASE_URL = "http://releases.linaro.org/"30RELEASE_URL = "http://releases.linaro.org/"
32OLD_RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www/platform"
33OLD_RELEASE_URL = "http://releases.linaro.org/platform/"31OLD_RELEASE_URL = "http://releases.linaro.org/platform/"
34SNAPSHOTS_WWW_DOCUMENT_ROOT = "/srv/snapshots.linaro.org/www/"
35SNAPSHOTS_URL = "http://snapshots.linaro.org/"32SNAPSHOTS_URL = "http://snapshots.linaro.org/"
3633
34BASE = "/srv"
35BASE = "/home/dooferlad/dev/fetch_image/layout/srv"
36RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE, "releases.linaro.org/www")
37SNAPSHOTS_WWW_DOCUMENT_ROOT = os.path.join(BASE, "snapshots.linaro.org/www")
38OLD_RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE,
39 "releases.linaro.org/www/platform")
40
37class ServerIndexer():41class ServerIndexer():
38 """Create a database of files on the linaro image servers for use by image42 """Create a database of files on the linaro image servers for use by image
39 creation tools."""43 creation tools."""
@@ -108,8 +112,7 @@
108 relative_location, not_match):112 relative_location, not_match):
109 not_match_ok = False113 not_match_ok = False
110114
111 if( not (to_match_ok and not_match_ok)115 if(not (to_match_ok and not_match_ok)):
112 or not re.search("\.gz$", file)):
113 continue # URL doesn't match the validator. Ignore.116 continue # URL doesn't match the validator. Ignore.
114117
115 logging.getLogger("linaro_fetch_image").info(url)118 logging.getLogger("linaro_fetch_image").info(url)
@@ -174,63 +177,115 @@
174 logger.addHandler(ch)177 logger.addHandler(ch)
175178
176 #linaro-n/ubuntu-desktop/11.09179 #linaro-n/ubuntu-desktop/11.09
177 crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT,180 crawler.add_directory_parse_list(
178 OLD_RELEASE_URL,181 OLD_RELEASES_WWW_DOCUMENT_ROOT,
179 ([], ["platform/", "old/", "hwpack",182 OLD_RELEASE_URL,
180 "alpha", "beta", "final", "leb",183 (["\.gz$"],
181 "leb", "release-candidate"]),184 ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb",
182 ["platform", "image", "build=final"],185 "release-candidate"]),
183 "release_binaries",186 ["platform", "image", "build=final", "upstream_release=unknown"],
184 ["", "image", "platform"])187 "release_binaries",
188 ["", "image", "platform"])
185189
186 #linaro-n/hwpacks/11.09190 #linaro-n/hwpacks/11.09
187 crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT,191 crawler.add_directory_parse_list(
188 OLD_RELEASE_URL,192 OLD_RELEASES_WWW_DOCUMENT_ROOT,
189 (["/hwpacks/"],193 OLD_RELEASE_URL,
190 ["alpha", "beta", "final", "leb",194 (["/hwpacks/", "\.gz$"],
191 "release-candidate"]),195 ["alpha", "beta", "final", "leb", "release-candidate"]),
192 ["platform", "hardware", "build=final"],196 ["platform", "hardware", "build=final", "upstream_release=unknown"],
193 "release_hwpacks",197 "release_hwpacks",
194 ["", "", "platform",198 ["", "", "platform",
195 ("hardware", r"hwpack_linaro-(.*?)_")])199 ("hardware", r"hwpack_linaro-(.*?)_")])
196200
197 # 11.10/ubuntu/oneiric-images/ubuntu-desktop/201 # 11.10/ubuntu/oneiric-images/ubuntu-desktop/
198 # NOT images/...202 # NOT images/...
199 crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,203 # 12.09/ubuntu/precise-images/ubuntu-desktop/
200 RELEASE_URL,204 # linaro-precise-ubuntu-desktop-20120923-436.tar.gz
201 (["\d+\.\d+", "ubuntu", "oneiric-images"],205 crawler.add_directory_parse_list(
202 ["latest/", "platform/", "old/",206 RELEASES_WWW_DOCUMENT_ROOT,
203 "hwpack", "^images/"]),207 RELEASE_URL,
204 ["platform", "image", "build=final"],208 (["\d+\.\d+", "ubuntu", "\w+-images", "\.gz$"],
205 "release_binaries",209 ["latest/", "platform/", "old/", "hwpack", "^images/"]),
206 ["platform", "", "", "image"])210 ["platform", "image", "build=final", "upstream_release"],
211 "release_binaries",
212 ["platform", "", ("upstream_release", "(\w+)-images"), "image"])
207213
208 # 11.10/ubuntu/oneiric-hwpacks/214 # 11.10/ubuntu/oneiric-hwpacks/
209 crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,215 # 12.09/ubuntu/precise-hwpacks
210 RELEASE_URL,216 crawler.add_directory_parse_list(
211 (["\d+\.\d+", "ubuntu", "oneiric-hwpacks"],217 RELEASES_WWW_DOCUMENT_ROOT,
212 ["latest/", "platform/", "old/",218 RELEASE_URL,
213 "^images/"]),219 (["\d+\.\d+", "ubuntu", "-hwpacks", "\.gz$"],
214 ["platform", "hardware", "build=final"],220 ["latest/", "platform/", "old/", "^images/"]),
215 "release_hwpacks",221 ["platform", "hardware", "build=final", "upstream_release"],
216 ["platform", "", "",222 "release_hwpacks",
217 ("hardware", r"hwpack_linaro-(.*?)_")])223 ["platform", "", ("upstream_release", "(\w+)-hwpacks"),
218224 ("hardware", r"hwpack_linaro-(.*?)_")])
219 #oneiric/linaro-o-alip/20111026/0/images/tar/225
220 crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT,226 # quantal/images/nano/127/linaro-quantal-nano-20120916-127.tar.gz
221 SNAPSHOTS_URL,227 crawler.add_directory_parse_list(
222 (["^oneiric/"], ["/hwpack"]),228 SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir
223 ["platform", "image", "date", "build"],229 SNAPSHOTS_URL, # base_url
224 "snapshot_binaries",230 # url_validator (match, don't match)
225 ["platform", "image", "date", "build"])231 (["^quantal/", "\.gz$"], ["/hwpack", "pre-built"]),
226232 ["platform", "image", "date", "build"], # db_columns
227 #oneiric/lt-panda-oneiric/20111026/0/images/hwpack/233 "snapshot_binaries", # table
228 crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT,234 ["linaro-"
229 SNAPSHOTS_URL,235 "(?P<platform>\w+)-"
230 (["^oneiric/", "/hwpack"], []),236 "(?P<image>[\w-]+?)[-_]+"
231 ["platform", "hardware", "date", "build"],237 "(?P<date>\d+)\D"
232 "snapshot_hwpacks",238 "(?P<build>\d+)"
233 ["platform", "hardware", "date", "build"])239 ".*?\.tar\.gz"]) # url_chunks
240
241 # quantal/hwpacks/lt-panda/215/hwpack_linaro-lt-panda_20120927-215_armhf_supported.tar.gz
242 crawler.add_directory_parse_list(
243 SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir
244 SNAPSHOTS_URL, # base_url
245 # url_validator (match, don't match)
246 (["^quantal/", "/hwpack", "\.gz$"], []),
247 ["platform=quantal", "hardware", "date", "build"], # db_columns
248 "snapshot_hwpacks", # table
249 ["hwpack_linaro-"
250 "(?P<hardware>.+?)_"
251 "(?P<date>.+?)-"
252 "(?P<build>\d+)_"
253 ".*\.tar\.gz"]) # url_chunks
254
255 # www/12.05/android/images/panda-ics-gcc47-tilt-tracking-blob/
256 # system.tar.bz2
257 crawler.add_directory_parse_list(
258 RELEASES_WWW_DOCUMENT_ROOT,
259 RELEASE_URL,
260 # url_validator (match, don't match)
261 (["^\d+\.\d+/", "android", "images", "system.tar.bz2$"], []),
262 ["release", "hardware"], # db_columns
263 "android_release", # table
264 ["release", "", "", "hardware"]) # url_chunks
265
266 # vexpress-rtsm-mp-jb-gcc47-armlt-stable-open/12/target/product/
267 # vexpress/system.tar.bz2
268 # NOT images/...
269 crawler.add_directory_parse_list(
270 RELEASES_WWW_DOCUMENT_ROOT,
271 RELEASE_URL,
272 # url_validator (match, don't match)
273 (["^/android/~linaro-android/", "system.tar.bz2$"], []),
274 ["build", "tag", "hardware"], # db_columns
275 "android_snapshots", # table
276 ["tag", "build", "", "", "hardware"]) # url_chunks
277
278 #linaro-n/android/11.04/panda/system.tar.bz2
279 crawler.add_directory_parse_list(
280 OLD_RELEASES_WWW_DOCUMENT_ROOT,
281 OLD_RELEASE_URL,
282 # url_validator (match, don't match)
283 (["system\.tar\.bz2$"],
284 ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb",
285 "release-candidate"]),
286 ["release", "hardware"], # db_columns
287 "android_release", # table
288 ["", "", "release", "hardware"]) # url_chunks
234289
235 crawler.crawl()290 crawler.crawl()
236 crawler.clean_removed_urls_from_db()291 crawler.clean_removed_urls_from_db()
237292
=== modified file 'linaro_fetch_image/fetch_image.py'
--- linaro_fetch_image/fetch_image.py 2012-06-23 02:56:23 +0000
+++ linaro_fetch_image/fetch_image.py 2012-10-10 14:13:21 +0000
@@ -963,25 +963,31 @@
963 # url_chunks now contains all parts of the url, split on /,963 # url_chunks now contains all parts of the url, split on /,
964 # not including the base URL964 # not including the base URL
965965
966 # Create a dictionary of column_name, value pairs to insert
967 to_insert = {}
968 # While doing this, create a list of columns without any extra
969 # information used by fixed value columns
970 db_columns = []
971 for name in self.url_parse[index]["db_columns"]:
972 name_search = re.search("(\w+)=(.*)", name)
973 if name_search:
974 to_insert[name_search.group(1)] = name_search.group(2)
975 db_columns.append(name_search.group(1))
976 else:
977 to_insert[name] = None
978 db_columns.append(name)
979
966 # We now construct an SQL command to insert the index data into the980 # We now construct an SQL command to insert the index data into the
967 # database using the information we have.981 # database using the information we have.
968
969 sqlcmd = "INSERT INTO " + table + " ("982 sqlcmd = "INSERT INTO " + table + " ("
970 length = 0983 length = 0
971 for name in (self.url_parse[index]["url_chunks"] + ["url"]):984 for name in (db_columns + ["url"]):
972 if name != "":985 if name != "":
973 if isinstance(name, tuple):986 if isinstance(name, tuple):
974 name = name[0]987 name = name[0]
975 sqlcmd += name + ", "988 sqlcmd += name + ", "
976 length += 1989 length += 1
977990
978 # Handle fixed value columns
979 for name in self.url_parse[index]["db_columns"]:
980 name_search = re.search("(\w+)=(.*)", name)
981 if name_search:
982 sqlcmd += name_search.group(1) + ", "
983 length += 1
984
985 sqlcmd = sqlcmd.rstrip(", ") # get rid of unwanted space & comma991 sqlcmd = sqlcmd.rstrip(", ") # get rid of unwanted space & comma
986 sqlcmd += ") VALUES ("992 sqlcmd += ") VALUES ("
987993
@@ -993,31 +999,45 @@
993 # Get the parameters from the URL to record in the SQL database999 # Get the parameters from the URL to record in the SQL database
994 sqlparams = []1000 sqlparams = []
995 chunk_index = 01001 chunk_index = 0
996 for name in self.url_parse[index]["url_chunks"]:1002
997 # If this part of the URL isn't a parameter, don't insert it1003 # Two options for parsing a URL.
998 if name != "":1004 # 1. Chunks (split up directories)
999 # If the entry is a tuple, it indicates it is of the form1005 # 2. 1 regexp
1000 # name, regexp1006 # We parse as a regexp if there is only one URL chunk listed (we
1001 if isinstance(name, tuple):1007 # assume 1 chunk is unusual!)
1002 # use stored regexp to extract data for the database1008 if len(self.url_parse[index]["url_chunks"]) == 1:
1003 match = re.search(name[1], url_chunks[chunk_index])1009 url_search = re.search(self.url_parse[index]["url_chunks"][0],
1004 assert match, ("Unable to match regexp to string ",1010 url)
1005 + url_chunks[chunk_index] + " " + name[1])1011 if not url_search:
1006 sqlparams.append(match.group(1))1012 print self.url_parse[index]["url_chunks"][0]
10071013 print url
1008 else:1014 print table
1009 sqlparams.append(url_chunks[chunk_index])1015 exit(1)
10101016 for column in db_columns:
1011 chunk_index += 11017 if not to_insert[column]:
10121018 to_insert[column] = url_search.group(column)
1019 else:
1020 for name in self.url_parse[index]["url_chunks"]:
1021 # If this part of the URL isn't a parameter, don't insert it
1022 if name != "":
1023 # If the entry is a tuple, it indicates it is of the form
1024 # name, regexp
1025 if isinstance(name, tuple):
1026 # use stored regexp to extract data for the database
1027 match = re.search(name[1], url_chunks[chunk_index])
1028 assert match, ("Unable to match regexp to string ",
1029 + url_chunks[chunk_index] + " " + name[1])
1030 to_insert[name[0]] = match.group(1)
1031 else:
1032 to_insert[name] = url_chunks[chunk_index]
1033
1034 chunk_index += 1
1035
1036 # Put data into array for inserting into SQL command
1037 for column in db_columns:
1038 sqlparams.append(to_insert[column])
1013 sqlparams.append(url)1039 sqlparams.append(url)
10141040
1015 # Handle fixed value columns
1016 for name in self.url_parse[index]["db_columns"]:
1017 name_search = re.search("(\w+)=(.*)", name)
1018 if name_search:
1019 sqlparams.append(name_search.group(2))
1020
1021 logging.info("{0}: {1}".format(sqlcmd, sqlparams))1041 logging.info("{0}: {1}".format(sqlcmd, sqlparams))
1022 self.c.execute(sqlcmd, tuple(sqlparams))1042 self.c.execute(sqlcmd, tuple(sqlparams))
10231043

Subscribers

People subscribed via source and target branches

to all changes: