Merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 into lp:linaro-fetch-image
- indexer-update-2012-10
- Merge into linaro-fetch-image
Proposed by
James Tunnicliffe
Status: | Merged |
---|---|
Approved by: | Milo Casagrande |
Approved revision: | 18 |
Merged at revision: | 18 |
Proposed branch: | lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 |
Merge into: | lp:linaro-fetch-image |
Diff against target: |
313 lines (+162/-87) 2 files modified
linaro-image-indexer (+110/-55) linaro_fetch_image/fetch_image.py (+52/-32) |
To merge this branch: | bzr merge lp:~dooferlad/linaro-fetch-image/indexer-update-2012-10 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Milo Casagrande (community) | Approve | ||
Review via email: mp+128909@code.launchpad.net |
Commit message
Description of the change
Updates indexing to work with latest layout.
Adds android information to database, but doesn't build images yet.
To post a comment you must log in.
- 17. By James Tunnicliffe
-
Fixed that I had disabled some bits of the indexer while debugging.
- 18. By James Tunnicliffe
-
Index all Ubuntu releases
Store Ubuntu release name in column upstream_release (release tables only)
New column not used yet.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'linaro-image-indexer' |
2 | --- linaro-image-indexer 2012-03-06 13:37:37 +0000 |
3 | +++ linaro-image-indexer 2012-10-10 14:13:21 +0000 |
4 | @@ -27,13 +27,17 @@ |
5 | import bz2 |
6 | import linaro_fetch_image.fetch_image |
7 | |
8 | -RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www" |
9 | RELEASE_URL = "http://releases.linaro.org/" |
10 | -OLD_RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www/platform" |
11 | OLD_RELEASE_URL = "http://releases.linaro.org/platform/" |
12 | -SNAPSHOTS_WWW_DOCUMENT_ROOT = "/srv/snapshots.linaro.org/www/" |
13 | SNAPSHOTS_URL = "http://snapshots.linaro.org/" |
14 | |
15 | +BASE = "/srv" |
16 | +BASE = "/home/dooferlad/dev/fetch_image/layout/srv" |
17 | +RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE, "releases.linaro.org/www") |
18 | +SNAPSHOTS_WWW_DOCUMENT_ROOT = os.path.join(BASE, "snapshots.linaro.org/www") |
19 | +OLD_RELEASES_WWW_DOCUMENT_ROOT = os.path.join(BASE, |
20 | + "releases.linaro.org/www/platform") |
21 | + |
22 | class ServerIndexer(): |
23 | """Create a database of files on the linaro image servers for use by image |
24 | creation tools.""" |
25 | @@ -108,8 +112,7 @@ |
26 | relative_location, not_match): |
27 | not_match_ok = False |
28 | |
29 | - if( not (to_match_ok and not_match_ok) |
30 | - or not re.search("\.gz$", file)): |
31 | + if(not (to_match_ok and not_match_ok)): |
32 | continue # URL doesn't match the validator. Ignore. |
33 | |
34 | logging.getLogger("linaro_fetch_image").info(url) |
35 | @@ -174,63 +177,115 @@ |
36 | logger.addHandler(ch) |
37 | |
38 | #linaro-n/ubuntu-desktop/11.09 |
39 | - crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT, |
40 | - OLD_RELEASE_URL, |
41 | - ([], ["platform/", "old/", "hwpack", |
42 | - "alpha", "beta", "final", "leb", |
43 | - "leb", "release-candidate"]), |
44 | - ["platform", "image", "build=final"], |
45 | - "release_binaries", |
46 | - ["", "image", "platform"]) |
47 | + crawler.add_directory_parse_list( |
48 | + OLD_RELEASES_WWW_DOCUMENT_ROOT, |
49 | + OLD_RELEASE_URL, |
50 | + (["\.gz$"], |
51 | + ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb", |
52 | + "release-candidate"]), |
53 | + ["platform", "image", "build=final", "upstream_release=unknown"], |
54 | + "release_binaries", |
55 | + ["", "image", "platform"]) |
56 | |
57 | #linaro-n/hwpacks/11.09 |
58 | - crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT, |
59 | - OLD_RELEASE_URL, |
60 | - (["/hwpacks/"], |
61 | - ["alpha", "beta", "final", "leb", |
62 | - "release-candidate"]), |
63 | - ["platform", "hardware", "build=final"], |
64 | - "release_hwpacks", |
65 | - ["", "", "platform", |
66 | - ("hardware", r"hwpack_linaro-(.*?)_")]) |
67 | + crawler.add_directory_parse_list( |
68 | + OLD_RELEASES_WWW_DOCUMENT_ROOT, |
69 | + OLD_RELEASE_URL, |
70 | + (["/hwpacks/", "\.gz$"], |
71 | + ["alpha", "beta", "final", "leb", "release-candidate"]), |
72 | + ["platform", "hardware", "build=final", "upstream_release=unknown"], |
73 | + "release_hwpacks", |
74 | + ["", "", "platform", |
75 | + ("hardware", r"hwpack_linaro-(.*?)_")]) |
76 | |
77 | # 11.10/ubuntu/oneiric-images/ubuntu-desktop/ |
78 | # NOT images/... |
79 | - crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT, |
80 | - RELEASE_URL, |
81 | - (["\d+\.\d+", "ubuntu", "oneiric-images"], |
82 | - ["latest/", "platform/", "old/", |
83 | - "hwpack", "^images/"]), |
84 | - ["platform", "image", "build=final"], |
85 | - "release_binaries", |
86 | - ["platform", "", "", "image"]) |
87 | + # 12.09/ubuntu/precise-images/ubuntu-desktop/ |
88 | + # linaro-precise-ubuntu-desktop-20120923-436.tar.gz |
89 | + crawler.add_directory_parse_list( |
90 | + RELEASES_WWW_DOCUMENT_ROOT, |
91 | + RELEASE_URL, |
92 | + (["\d+\.\d+", "ubuntu", "\w+-images", "\.gz$"], |
93 | + ["latest/", "platform/", "old/", "hwpack", "^images/"]), |
94 | + ["platform", "image", "build=final", "upstream_release"], |
95 | + "release_binaries", |
96 | + ["platform", "", ("upstream_release", "(\w+)-images"), "image"]) |
97 | |
98 | # 11.10/ubuntu/oneiric-hwpacks/ |
99 | - crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT, |
100 | - RELEASE_URL, |
101 | - (["\d+\.\d+", "ubuntu", "oneiric-hwpacks"], |
102 | - ["latest/", "platform/", "old/", |
103 | - "^images/"]), |
104 | - ["platform", "hardware", "build=final"], |
105 | - "release_hwpacks", |
106 | - ["platform", "", "", |
107 | - ("hardware", r"hwpack_linaro-(.*?)_")]) |
108 | - |
109 | - #oneiric/linaro-o-alip/20111026/0/images/tar/ |
110 | - crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT, |
111 | - SNAPSHOTS_URL, |
112 | - (["^oneiric/"], ["/hwpack"]), |
113 | - ["platform", "image", "date", "build"], |
114 | - "snapshot_binaries", |
115 | - ["platform", "image", "date", "build"]) |
116 | - |
117 | - #oneiric/lt-panda-oneiric/20111026/0/images/hwpack/ |
118 | - crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT, |
119 | - SNAPSHOTS_URL, |
120 | - (["^oneiric/", "/hwpack"], []), |
121 | - ["platform", "hardware", "date", "build"], |
122 | - "snapshot_hwpacks", |
123 | - ["platform", "hardware", "date", "build"]) |
124 | + # 12.09/ubuntu/precise-hwpacks |
125 | + crawler.add_directory_parse_list( |
126 | + RELEASES_WWW_DOCUMENT_ROOT, |
127 | + RELEASE_URL, |
128 | + (["\d+\.\d+", "ubuntu", "-hwpacks", "\.gz$"], |
129 | + ["latest/", "platform/", "old/", "^images/"]), |
130 | + ["platform", "hardware", "build=final", "upstream_release"], |
131 | + "release_hwpacks", |
132 | + ["platform", "", ("upstream_release", "(\w+)-hwpacks"), |
133 | + ("hardware", r"hwpack_linaro-(.*?)_")]) |
134 | + |
135 | + # quantal/images/nano/127/linaro-quantal-nano-20120916-127.tar.gz |
136 | + crawler.add_directory_parse_list( |
137 | + SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir |
138 | + SNAPSHOTS_URL, # base_url |
139 | + # url_validator (match, don't match) |
140 | + (["^quantal/", "\.gz$"], ["/hwpack", "pre-built"]), |
141 | + ["platform", "image", "date", "build"], # db_columns |
142 | + "snapshot_binaries", # table |
143 | + ["linaro-" |
144 | + "(?P<platform>\w+)-" |
145 | + "(?P<image>[\w-]+?)[-_]+" |
146 | + "(?P<date>\d+)\D" |
147 | + "(?P<build>\d+)" |
148 | + ".*?\.tar\.gz"]) # url_chunks |
149 | + |
150 | + # quantal/hwpacks/lt-panda/215/hwpack_linaro-lt-panda_20120927-215_armhf_supported.tar.gz |
151 | + crawler.add_directory_parse_list( |
152 | + SNAPSHOTS_WWW_DOCUMENT_ROOT, # base_dir |
153 | + SNAPSHOTS_URL, # base_url |
154 | + # url_validator (match, don't match) |
155 | + (["^quantal/", "/hwpack", "\.gz$"], []), |
156 | + ["platform=quantal", "hardware", "date", "build"], # db_columns |
157 | + "snapshot_hwpacks", # table |
158 | + ["hwpack_linaro-" |
159 | + "(?P<hardware>.+?)_" |
160 | + "(?P<date>.+?)-" |
161 | + "(?P<build>\d+)_" |
162 | + ".*\.tar\.gz"]) # url_chunks |
163 | + |
164 | + # www/12.05/android/images/panda-ics-gcc47-tilt-tracking-blob/ |
165 | + # system.tar.bz2 |
166 | + crawler.add_directory_parse_list( |
167 | + RELEASES_WWW_DOCUMENT_ROOT, |
168 | + RELEASE_URL, |
169 | + # url_validator (match, don't match) |
170 | + (["^\d+\.\d+/", "android", "images", "system.tar.bz2$"], []), |
171 | + ["release", "hardware"], # db_columns |
172 | + "android_release", # table |
173 | + ["release", "", "", "hardware"]) # url_chunks |
174 | + |
175 | + # vexpress-rtsm-mp-jb-gcc47-armlt-stable-open/12/target/product/ |
176 | + # vexpress/system.tar.bz2 |
177 | + # NOT images/... |
178 | + crawler.add_directory_parse_list( |
179 | + RELEASES_WWW_DOCUMENT_ROOT, |
180 | + RELEASE_URL, |
181 | + # url_validator (match, don't match) |
182 | + (["^/android/~linaro-android/", "system.tar.bz2$"], []), |
183 | + ["build", "tag", "hardware"], # db_columns |
184 | + "android_snapshots", # table |
185 | + ["tag", "build", "", "", "hardware"]) # url_chunks |
186 | + |
187 | + #linaro-n/android/11.04/panda/system.tar.bz2 |
188 | + crawler.add_directory_parse_list( |
189 | + OLD_RELEASES_WWW_DOCUMENT_ROOT, |
190 | + OLD_RELEASE_URL, |
191 | + # url_validator (match, don't match) |
192 | + (["system\.tar\.bz2$"], |
193 | + ["platform/", "old/", "hwpack", "alpha", "beta", "final", "leb", |
194 | + "release-candidate"]), |
195 | + ["release", "hardware"], # db_columns |
196 | + "android_release", # table |
197 | + ["", "", "release", "hardware"]) # url_chunks |
198 | |
199 | crawler.crawl() |
200 | crawler.clean_removed_urls_from_db() |
201 | |
202 | === modified file 'linaro_fetch_image/fetch_image.py' |
203 | --- linaro_fetch_image/fetch_image.py 2012-06-23 02:56:23 +0000 |
204 | +++ linaro_fetch_image/fetch_image.py 2012-10-10 14:13:21 +0000 |
205 | @@ -963,25 +963,31 @@ |
206 | # url_chunks now contains all parts of the url, split on /, |
207 | # not including the base URL |
208 | |
209 | + # Create a dictionary of column_name, value pairs to insert |
210 | + to_insert = {} |
211 | + # While doing this, create a list of columns without any extra |
212 | + # information used by fixed value columns |
213 | + db_columns = [] |
214 | + for name in self.url_parse[index]["db_columns"]: |
215 | + name_search = re.search("(\w+)=(.*)", name) |
216 | + if name_search: |
217 | + to_insert[name_search.group(1)] = name_search.group(2) |
218 | + db_columns.append(name_search.group(1)) |
219 | + else: |
220 | + to_insert[name] = None |
221 | + db_columns.append(name) |
222 | + |
223 | # We now construct an SQL command to insert the index data into the |
224 | # database using the information we have. |
225 | - |
226 | sqlcmd = "INSERT INTO " + table + " (" |
227 | length = 0 |
228 | - for name in (self.url_parse[index]["url_chunks"] + ["url"]): |
229 | + for name in (db_columns + ["url"]): |
230 | if name != "": |
231 | if isinstance(name, tuple): |
232 | name = name[0] |
233 | sqlcmd += name + ", " |
234 | length += 1 |
235 | |
236 | - # Handle fixed value columns |
237 | - for name in self.url_parse[index]["db_columns"]: |
238 | - name_search = re.search("(\w+)=(.*)", name) |
239 | - if name_search: |
240 | - sqlcmd += name_search.group(1) + ", " |
241 | - length += 1 |
242 | - |
243 | sqlcmd = sqlcmd.rstrip(", ") # get rid of unwanted space & comma |
244 | sqlcmd += ") VALUES (" |
245 | |
246 | @@ -993,31 +999,45 @@ |
247 | # Get the parameters from the URL to record in the SQL database |
248 | sqlparams = [] |
249 | chunk_index = 0 |
250 | - for name in self.url_parse[index]["url_chunks"]: |
251 | - # If this part of the URL isn't a parameter, don't insert it |
252 | - if name != "": |
253 | - # If the entry is a tuple, it indicates it is of the form |
254 | - # name, regexp |
255 | - if isinstance(name, tuple): |
256 | - # use stored regexp to extract data for the database |
257 | - match = re.search(name[1], url_chunks[chunk_index]) |
258 | - assert match, ("Unable to match regexp to string ", |
259 | - + url_chunks[chunk_index] + " " + name[1]) |
260 | - sqlparams.append(match.group(1)) |
261 | - |
262 | - else: |
263 | - sqlparams.append(url_chunks[chunk_index]) |
264 | - |
265 | - chunk_index += 1 |
266 | - |
267 | + |
268 | + # Two options for parsing a URL. |
269 | + # 1. Chunks (split up directories) |
270 | + # 2. 1 regexp |
271 | + # We parse as a regexp if there is only one URL chunk listed (we |
272 | + # assume 1 chunk is unusual!) |
273 | + if len(self.url_parse[index]["url_chunks"]) == 1: |
274 | + url_search = re.search(self.url_parse[index]["url_chunks"][0], |
275 | + url) |
276 | + if not url_search: |
277 | + print self.url_parse[index]["url_chunks"][0] |
278 | + print url |
279 | + print table |
280 | + exit(1) |
281 | + for column in db_columns: |
282 | + if not to_insert[column]: |
283 | + to_insert[column] = url_search.group(column) |
284 | + else: |
285 | + for name in self.url_parse[index]["url_chunks"]: |
286 | + # If this part of the URL isn't a parameter, don't insert it |
287 | + if name != "": |
288 | + # If the entry is a tuple, it indicates it is of the form |
289 | + # name, regexp |
290 | + if isinstance(name, tuple): |
291 | + # use stored regexp to extract data for the database |
292 | + match = re.search(name[1], url_chunks[chunk_index]) |
293 | + assert match, ("Unable to match regexp to string ", |
294 | + + url_chunks[chunk_index] + " " + name[1]) |
295 | + to_insert[name[0]] = match.group(1) |
296 | + else: |
297 | + to_insert[name] = url_chunks[chunk_index] |
298 | + |
299 | + chunk_index += 1 |
300 | + |
301 | + # Put data into array for inserting into SQL command |
302 | + for column in db_columns: |
303 | + sqlparams.append(to_insert[column]) |
304 | sqlparams.append(url) |
305 | |
306 | - # Handle fixed value columns |
307 | - for name in self.url_parse[index]["db_columns"]: |
308 | - name_search = re.search("(\w+)=(.*)", name) |
309 | - if name_search: |
310 | - sqlparams.append(name_search.group(2)) |
311 | - |
312 | logging.info("{0}: {1}".format(sqlcmd, sqlparams)) |
313 | self.c.execute(sqlcmd, tuple(sqlparams)) |
314 |
Hi James,
changes look good to me.
+1
Note: there are some PEP8 warnings, should we fix them? I can manage to do that as maintenance task in case...