Merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 into lp:linaro-fetch-image
- indexer-update-2012-10
- Merge into linaro-fetch-image
Proposed by
James Tunnicliffe
Status: | Merged |
---|---|
Approved by: | Milo Casagrande |
Approved revision: | 18 |
Merged at revision: | 18 |
Proposed branch: | lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 |
Merge into: | lp:linaro-fetch-image |
Diff against target: |
313 lines (+162/-87) 2 files modified
linaro-image-indexer (+110/-55) linaro_fetch_image/fetch_image.py (+52/-32) |
To merge this branch: | bzr merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Milo Casagrande (community) | Approve | ||
Review via email: mp+128909@code.launchpad.net |
Commit message
Description of the change
Updates indexing to work with latest layout.
Adds android information to database, but doesn't build images yet.
To post a comment you must log in.
- 17. By James Tunnicliffe
-
Fixed that I had disabled some bits of the indexer while debugging.
- 18. By James Tunnicliffe
-
Index all Ubuntu releases
Store Ubuntu release name in column upstream_release (release tables only)
New column not used yet.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'linaro-image-indexer' | |||
2 | --- linaro-image-indexer 2012-03-06 13:37:37 +0000 | |||
3 | +++ linaro-image-indexer 2012-10-10 14:13:21 +0000 | |||
4 | @@ -27,13 +27,17 @@ | |||
5 | 27 | import bz2 | 27 | import bz2 |
6 | 28 | import linaro_fetch_image.fetch_image | 28 | import linaro_fetch_image.fetch_image |
7 | 29 | 29 | ||
8 | 30 | RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www" | ||
9 | 31 | RELEASE_URL = "http://releases.linaro.org/" | 30 | RELEASE_URL = "http://releases.linaro.org/" |
10 | 32 | OLD_RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www/platform" | ||
11 | 33 | OLD_RELEASE_URL = "http://releases.linaro.org/platform/" | 31 | OLD_RELEASE_URL = "http://releases.linaro.org/platform/" |
12 | 34 | SNAPSHOTS_WWW_DOCUMENT_ROOT = "/srv/snapshots.linaro.org/www/" | ||
13 | 35 | SNAPSHOTS_URL = "http://snapshots.linaro.org/" | 32 | SNAPSHOTS_URL = "http://snapshots.linaro.org/" |
14 | 36 | 33 | ||
15 | 34 | BASE = "/srv" | ||
16 | 35 | BASE = "/home/dooferlad/dev/fetch_image/layout/srv" | ||
17 | 36 | RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE, "releases.linaro.org/www") | ||
18 | 37 | SNAPSHOTS_WWW_DOCUMENT_ROOT = os.path.join(BASE, "snapshots.linaro.org/www") | ||
19 | 38 | OLD_RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE, | ||
20 | 39 | "releases.linaro.org/www/platform") | ||
21 | 40 | |||
22 | 37 | class ServerIndexer(): | 41 | class ServerIndexer(): |
23 | 38 | """Create a database of files on the linaro image servers for use by image | 42 | """Create a database of files on the linaro image servers for use by image |
24 | 39 | creation tools.""" | 43 | creation tools.""" |
25 | @@ -108,8 +112,7 @@ | |||
26 | 108 | relative_location, not_match): | 112 | relative_location, not_match): |
27 | 109 | not_match_ok = False | 113 | not_match_ok = False |
28 | 110 | 114 | ||
31 | 111 | if( not (to_match_ok and not_match_ok) | 115 | if(not (to_match_ok and not_match_ok)): |
30 | 112 | or not re.search("\.gz$", file)): | ||
32 | 113 | continue # URL doesn't match the validator. Ignore. | 116 | continue # URL doesn't match the validator. Ignore. |
33 | 114 | 117 | ||
34 | 115 | logging.getLogger("linaro_fetch_image").info(url) | 118 | logging.getLogger("linaro_fetch_image").info(url) |
35 | @@ -174,63 +177,115 @@ | |||
36 | 174 | logger.addHandler(ch) | 177 | logger.addHandler(ch) |
37 | 175 | 178 | ||
38 | 176 | #linaro-n/ubuntu-desktop/11.09 | 179 | #linaro-n/ubuntu-desktop/11.09 |
47 | 177 | crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT, | 180 | crawler.add_directory_parse_list( |
48 | 178 | OLD_RELEASE_URL, | 181 | OLD_RELEASES_WWW_DOCUMENT_ROOT, |
49 | 179 | ([], ["platform/", "old/", "hwpack", | 182 | OLD_RELEASE_URL, |
50 | 180 | "alpha", "beta", "final", "leb", | 183 | (["\.gz$"], |
51 | 181 | "leb", "release-candidate"]), | 184 | ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb", |
52 | 182 | ["platform", "image", "build=final"], | 185 | "release-candidate"]), |
53 | 183 | "release_binaries", | 186 | ["platform", "image", "build=final", "upstream_release=unknown"], |
54 | 184 | ["", "image", "platform"]) | 187 | "release_binaries", |
55 | 188 | ["", "image", "platform"]) | ||
56 | 185 | 189 | ||
57 | 186 | #linaro-n/hwpacks/11.09 | 190 | #linaro-n/hwpacks/11.09 |
67 | 187 | crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT, | 191 | crawler.add_directory_parse_list( |
68 | 188 | OLD_RELEASE_URL, | 192 | OLD_RELEASES_WWW_DOCUMENT_ROOT, |
69 | 189 | (["/hwpacks/"], | 193 | OLD_RELEASE_URL, |
70 | 190 | ["alpha", "beta", "final", "leb", | 194 | (["/hwpacks/", "\.gz$"], |
71 | 191 | "release-candidate"]), | 195 | ["alpha", "beta", "final", "leb", "release-candidate"]), |
72 | 192 | ["platform", "hardware", "build=final"], | 196 | ["platform", "hardware", "build=final", "upstream_release=unknown"], |
73 | 193 | "release_hwpacks", | 197 | "release_hwpacks", |
74 | 194 | ["", "", "platform", | 198 | ["", "", "platform", |
75 | 195 | ("hardware", r"hwpack_linaro-(.*?)_")]) | 199 | ("hardware", r"hwpack_linaro-(.*?)_")]) |
76 | 196 | 200 | ||
77 | 197 | # 11.10/ubuntu/oneiric-images/ubuntu-desktop/ | 201 | # 11.10/ubuntu/oneiric-images/ubuntu-desktop/ |
78 | 198 | # NOT images/... | 202 | # NOT images/... |
87 | 199 | crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT, | 203 | # 12.09/ubuntu/precise-images/ubuntu-desktop/ |
88 | 200 | RELEASE_URL, | 204 | # linaro-precise-ubuntu-desktop-20120923-436.tar.gz |
89 | 201 | (["\d+\.\d+", "ubuntu", "oneiric-images"], | 205 | crawler.add_directory_parse_list( |
90 | 202 | ["latest/", "platform/", "old/", | 206 | RELEASES_WWW_DOCUMENT_ROOT, |
91 | 203 | "hwpack", "^images/"]), | 207 | RELEASE_URL, |
92 | 204 | ["platform", "image", "build=final"], | 208 | (["\d+\.\d+", "ubuntu", "\w+-images", "\.gz$"], |
93 | 205 | "release_binaries", | 209 | ["latest/", "platform/", "old/", "hwpack", "^images/"]), |
94 | 206 | ["platform", "", "", "image"]) | 210 | ["platform", "image", "build=final", "upstream_release"], |
95 | 211 | "release_binaries", | ||
96 | 212 | ["platform", "", ("upstream_release", "(\w+)-images"), "image"]) | ||
97 | 207 | 213 | ||
98 | 208 | # 11.10/ubuntu/oneiric-hwpacks/ | 214 | # 11.10/ubuntu/oneiric-hwpacks/ |
124 | 209 | crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT, | 215 | # 12.09/ubuntu/precise-hwpacks |
125 | 210 | RELEASE_URL, | 216 | crawler.add_directory_parse_list( |
126 | 211 | (["\d+\.\d+", "ubuntu", "oneiric-hwpacks"], | 217 | RELEASES_WWW_DOCUMENT_ROOT, |
127 | 212 | ["latest/", "platform/", "old/", | 218 | RELEASE_URL, |
128 | 213 | "^images/"]), | 219 | (["\d+\.\d+", "ubuntu", "-hwpacks", "\.gz$"], |
129 | 214 | ["platform", "hardware", "build=final"], | 220 | ["latest/", "platform/", "old/", "^images/"]), |
130 | 215 | "release_hwpacks", | 221 | ["platform", "hardware", "build=final", "upstream_release"], |
131 | 216 | ["platform", "", "", | 222 | "release_hwpacks", |
132 | 217 | ("hardware", r"hwpack_linaro-(.*?)_")]) | 223 | ["platform", "", ("upstream_release", "(\w+)-hwpacks"), |
133 | 218 | 224 | ("hardware", r"hwpack_linaro-(.*?)_")]) | |
134 | 219 | #oneiric/linaro-o-alip/20111026/0/images/tar/ | 225 | |
135 | 220 | crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT, | 226 | # quantal/images/nano/127/linaro-quantal-nano-20120916-127.tar.gz |
136 | 221 | SNAPSHOTS_URL, | 227 | crawler.add_directory_parse_list( |
137 | 222 | (["^oneiric/"], ["/hwpack"]), | 228 | SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir |
138 | 223 | ["platform", "image", "date", "build"], | 229 | SNAPSHOTS_URL, # base_url |
139 | 224 | "snapshot_binaries", | 230 | # url_validator (match, don't match) |
140 | 225 | ["platform", "image", "date", "build"]) | 231 | (["^quantal/", "\.gz$"], ["/hwpack", "pre-built"]), |
141 | 226 | 232 | ["platform", "image", "date", "build"], # db_columns | |
142 | 227 | #oneiric/lt-panda-oneiric/20111026/0/images/hwpack/ | 233 | "snapshot_binaries", # table |
143 | 228 | crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT, | 234 | ["linaro-" |
144 | 229 | SNAPSHOTS_URL, | 235 | "(?P<platform>\w+)-" |
145 | 230 | (["^oneiric/", "/hwpack"], []), | 236 | "(?P<image>[\w-]+?)[-_]+" |
146 | 231 | ["platform", "hardware", "date", "build"], | 237 | "(?P<date>\d+)\D" |
147 | 232 | "snapshot_hwpacks", | 238 | "(?P<build>\d+)" |
148 | 233 | ["platform", "hardware", "date", "build"]) | 239 | ".*?\.tar\.gz"]) # url_chunks |
149 | 240 | |||
150 | 241 | # quantal/hwpacks/lt-panda/215/hwpack_linaro-lt-panda_20120927-215_armhf_supported.tar.gz | ||
151 | 242 | crawler.add_directory_parse_list( | ||
152 | 243 | SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir | ||
153 | 244 | SNAPSHOTS_URL, # base_url | ||
154 | 245 | # url_validator (match, don't match) | ||
155 | 246 | (["^quantal/", "/hwpack", "\.gz$"], []), | ||
156 | 247 | ["platform=quantal", "hardware", "date", "build"], # db_columns | ||
157 | 248 | "snapshot_hwpacks", # table | ||
158 | 249 | ["hwpack_linaro-" | ||
159 | 250 | "(?P<hardware>.+?)_" | ||
160 | 251 | "(?P<date>.+?)-" | ||
161 | 252 | "(?P<build>\d+)_" | ||
162 | 253 | ".*\.tar\.gz"]) # url_chunks | ||
163 | 254 | |||
164 | 255 | # www/12.05/android/images/panda-ics-gcc47-tilt-tracking-blob/ | ||
165 | 256 | # system.tar.bz2 | ||
166 | 257 | crawler.add_directory_parse_list( | ||
167 | 258 | RELEASES_WWW_DOCUMENT_ROOT, | ||
168 | 259 | RELEASE_URL, | ||
169 | 260 | # url_validator (match, don't match) | ||
170 | 261 | (["^\d+\.\d+/", "android", "images", "system.tar.bz2$"], []), | ||
171 | 262 | ["release", "hardware"], # db_columns | ||
172 | 263 | "android_release", # table | ||
173 | 264 | ["release", "", "", "hardware"]) # url_chunks | ||
174 | 265 | |||
175 | 266 | # vexpress-rtsm-mp-jb-gcc47-armlt-stable-open/12/target/product/ | ||
176 | 267 | # vexpress/system.tar.bz2 | ||
177 | 268 | # NOT images/... | ||
178 | 269 | crawler.add_directory_parse_list( | ||
179 | 270 | RELEASES_WWW_DOCUMENT_ROOT, | ||
180 | 271 | RELEASE_URL, | ||
181 | 272 | # url_validator (match, don't match) | ||
182 | 273 | (["^/android/~linaro-android/", "system.tar.bz2$"], []), | ||
183 | 274 | ["build", "tag", "hardware"], # db_columns | ||
184 | 275 | "android_snapshots", # table | ||
185 | 276 | ["tag", "build", "", "", "hardware"]) # url_chunks | ||
186 | 277 | |||
187 | 278 | #linaro-n/android/11.04/panda/system.tar.bz2 | ||
188 | 279 | crawler.add_directory_parse_list( | ||
189 | 280 | OLD_RELEASES_WWW_DOCUMENT_ROOT, | ||
190 | 281 | OLD_RELEASE_URL, | ||
191 | 282 | # url_validator (match, don't match) | ||
192 | 283 | (["system\.tar\.bz2$"], | ||
193 | 284 | ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb", | ||
194 | 285 | "release-candidate"]), | ||
195 | 286 | ["release", "hardware"], # db_columns | ||
196 | 287 | "android_release", # table | ||
197 | 288 | ["", "", "release", "hardware"]) # url_chunks | ||
198 | 234 | 289 | ||
199 | 235 | crawler.crawl() | 290 | crawler.crawl() |
200 | 236 | crawler.clean_removed_urls_from_db() | 291 | crawler.clean_removed_urls_from_db() |
201 | 237 | 292 | ||
202 | === modified file 'linaro_fetch_image/fetch_image.py' | |||
203 | --- linaro_fetch_image/fetch_image.py 2012-06-23 02:56:23 +0000 | |||
204 | +++ linaro_fetch_image/fetch_image.py 2012-10-10 14:13:21 +0000 | |||
205 | @@ -963,25 +963,31 @@ | |||
206 | 963 | # url_chunks now contains all parts of the url, split on /, | 963 | # url_chunks now contains all parts of the url, split on /, |
207 | 964 | # not including the base URL | 964 | # not including the base URL |
208 | 965 | 965 | ||
209 | 966 | # Create a dictionary of column_name, value pairs to insert | ||
210 | 967 | to_insert = {} | ||
211 | 968 | # While doing this, create a list of columns without any extra | ||
212 | 969 | # information used by fixed value columns | ||
213 | 970 | db_columns = [] | ||
214 | 971 | for name in self.url_parse[index]["db_columns"]: | ||
215 | 972 | name_search = re.search("(\w+)=(.*)", name) | ||
216 | 973 | if name_search: | ||
217 | 974 | to_insert[name_search.group(1)] = name_search.group(2) | ||
218 | 975 | db_columns.append(name_search.group(1)) | ||
219 | 976 | else: | ||
220 | 977 | to_insert[name] = None | ||
221 | 978 | db_columns.append(name) | ||
222 | 979 | |||
223 | 966 | # We now construct an SQL command to insert the index data into the | 980 | # We now construct an SQL command to insert the index data into the |
224 | 967 | # database using the information we have. | 981 | # database using the information we have. |
225 | 968 | |||
226 | 969 | sqlcmd = "INSERT INTO " + table + " (" | 982 | sqlcmd = "INSERT INTO " + table + " (" |
227 | 970 | length = 0 | 983 | length = 0 |
229 | 971 | for name in (self.url_parse[index]["url_chunks"] + ["url"]): | 984 | for name in (db_columns + ["url"]): |
230 | 972 | if name != "": | 985 | if name != "": |
231 | 973 | if isinstance(name, tuple): | 986 | if isinstance(name, tuple): |
232 | 974 | name = name[0] | 987 | name = name[0] |
233 | 975 | sqlcmd += name + ", " | 988 | sqlcmd += name + ", " |
234 | 976 | length += 1 | 989 | length += 1 |
235 | 977 | 990 | ||
236 | 978 | # Handle fixed value columns | ||
237 | 979 | for name in self.url_parse[index]["db_columns"]: | ||
238 | 980 | name_search = re.search("(\w+)=(.*)", name) | ||
239 | 981 | if name_search: | ||
240 | 982 | sqlcmd += name_search.group(1) + ", " | ||
241 | 983 | length += 1 | ||
242 | 984 | |||
243 | 985 | sqlcmd = sqlcmd.rstrip(", ") # get rid of unwanted space & comma | 991 | sqlcmd = sqlcmd.rstrip(", ") # get rid of unwanted space & comma |
244 | 986 | sqlcmd += ") VALUES (" | 992 | sqlcmd += ") VALUES (" |
245 | 987 | 993 | ||
246 | @@ -993,31 +999,45 @@ | |||
247 | 993 | # Get the parameters from the URL to record in the SQL database | 999 | # Get the parameters from the URL to record in the SQL database |
248 | 994 | sqlparams = [] | 1000 | sqlparams = [] |
249 | 995 | chunk_index = 0 | 1001 | chunk_index = 0 |
267 | 996 | for name in self.url_parse[index]["url_chunks"]: | 1002 | |
268 | 997 | # If this part of the URL isn't a parameter, don't insert it | 1003 | # Two options for parsing a URL. |
269 | 998 | if name != "": | 1004 | # 1. Chunks (split up directories) |
270 | 999 | # If the entry is a tuple, it indicates it is of the form | 1005 | # 2. 1 regexp |
271 | 1000 | # name, regexp | 1006 | # We parse as a regexp if there is only one URL chunk listed (we |
272 | 1001 | if isinstance(name, tuple): | 1007 | # assume 1 chunk is unusual!) |
273 | 1002 | # use stored regexp to extract data for the database | 1008 | if len(self.url_parse[index]["url_chunks"]) == 1: |
274 | 1003 | match = re.search(name[1], url_chunks[chunk_index]) | 1009 | url_search = re.search(self.url_parse[index]["url_chunks"][0], |
275 | 1004 | assert match, ("Unable to match regexp to string ", | 1010 | url) |
276 | 1005 | + url_chunks[chunk_index] + " " + name[1]) | 1011 | if not url_search: |
277 | 1006 | sqlparams.append(match.group(1)) | 1012 | print self.url_parse[index]["url_chunks"][0] |
278 | 1007 | 1013 | print url | |
279 | 1008 | else: | 1014 | print table |
280 | 1009 | sqlparams.append(url_chunks[chunk_index]) | 1015 | exit(1) |
281 | 1010 | 1016 | for column in db_columns: | |
282 | 1011 | chunk_index += 1 | 1017 | if not to_insert[column]: |
283 | 1012 | 1018 | to_insert[column] = url_search.group(column) | |
284 | 1019 | else: | ||
285 | 1020 | for name in self.url_parse[index]["url_chunks"]: | ||
286 | 1021 | # If this part of the URL isn't a parameter, don't insert it | ||
287 | 1022 | if name != "": | ||
288 | 1023 | # If the entry is a tuple, it indicates it is of the form | ||
289 | 1024 | # name, regexp | ||
290 | 1025 | if isinstance(name, tuple): | ||
291 | 1026 | # use stored regexp to extract data for the database | ||
292 | 1027 | match = re.search(name[1], url_chunks[chunk_index]) | ||
293 | 1028 | assert match, ("Unable to match regexp to string ", | ||
294 | 1029 | + url_chunks[chunk_index] + " " + name[1]) | ||
295 | 1030 | to_insert[name[0]] = match.group(1) | ||
296 | 1031 | else: | ||
297 | 1032 | to_insert[name] = url_chunks[chunk_index] | ||
298 | 1033 | |||
299 | 1034 | chunk_index += 1 | ||
300 | 1035 | |||
301 | 1036 | # Put data into array for inserting into SQL command | ||
302 | 1037 | for column in db_columns: | ||
303 | 1038 | sqlparams.append(to_insert[column]) | ||
304 | 1013 | sqlparams.append(url) | 1039 | sqlparams.append(url) |
305 | 1014 | 1040 | ||
306 | 1015 | # Handle fixed value columns | ||
307 | 1016 | for name in self.url_parse[index]["db_columns"]: | ||
308 | 1017 | name_search = re.search("(\w+)=(.*)", name) | ||
309 | 1018 | if name_search: | ||
310 | 1019 | sqlparams.append(name_search.group(2)) | ||
311 | 1020 | |||
312 | 1021 | logging.info("{0}: {1}".format(sqlcmd, sqlparams)) | 1041 | logging.info("{0}: {1}".format(sqlcmd, sqlparams)) |
313 | 1022 | self.c.execute(sqlcmd, tuple(sqlparams)) | 1042 | self.c.execute(sqlcmd, tuple(sqlparams)) |
314 | 1023 | 1043 |
Hi James,
changes look good to me.
+1
Note: there are some PEP8 warnings, should we fix them? I can manage to do that as maintenance task in case...