Merge lp:~mars/launchpad/test-ghost-update into lp:~launchpad/launchpad/ghost-line
- test-ghost-update
- Merge into ghost-line
Proposed by
Māris Fogels
Status: | Merged |
---|---|
Approved by: | Māris Fogels |
Approved revision: | 11785 |
Merged at revision: | 11785 |
Proposed branch: | lp:~mars/launchpad/test-ghost-update |
Merge into: | lp:~launchpad/launchpad/ghost-line |
Diff against target: |
7192 lines (+3508/-2210) 24 files modified
lib/lp/buildmaster/doc/builder.txt (+118/-2) lib/lp/buildmaster/interfaces/builder.py (+62/-83) lib/lp/buildmaster/manager.py (+468/-204) lib/lp/buildmaster/model/builder.py (+224/-240) lib/lp/buildmaster/model/buildfarmjobbehavior.py (+52/-60) lib/lp/buildmaster/model/packagebuild.py (+0/-6) lib/lp/buildmaster/tests/mock_slaves.py (+32/-157) lib/lp/buildmaster/tests/test_builder.py (+154/-582) lib/lp/buildmaster/tests/test_manager.py (+782/-248) lib/lp/buildmaster/tests/test_packagebuild.py (+0/-12) lib/lp/code/model/recipebuilder.py (+28/-32) lib/lp/soyuz/browser/tests/test_builder_views.py (+1/-1) lib/lp/soyuz/doc/buildd-dispatching.txt (+371/-0) lib/lp/soyuz/doc/buildd-slavescanner.txt (+876/-0) lib/lp/soyuz/model/binarypackagebuildbehavior.py (+41/-59) lib/lp/soyuz/tests/test_binarypackagebuildbehavior.py (+8/-290) lib/lp/soyuz/tests/test_doc.py (+6/-0) lib/lp/testing/factory.py (+2/-8) lib/lp/translations/doc/translationtemplatesbuildbehavior.txt (+114/-0) lib/lp/translations/model/translationtemplatesbuildbehavior.py (+14/-20) lib/lp/translations/stories/buildfarm/xx-build-summary.txt (+1/-1) lib/lp/translations/tests/test_translationtemplatesbuildbehavior.py (+153/-202) lib/lp_sitecustomize.py (+0/-3) utilities/migrater/file-ownership.txt (+1/-0) |
To merge this branch: | bzr merge lp:~mars/launchpad/test-ghost-update |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Māris Fogels (community) | Approve | ||
Review via email: mp+42514@code.launchpad.net |
Commit message
Test merge for bundle-merge command
Description of the change
Test merge
To post a comment you must log in.
Revision history for this message
Māris Fogels (mars) : | # |
review:
Approve
Revision history for this message
Launchpad PQM Bot (launchpad-pqm) wrote : | # |
Revision history for this message
Māris Fogels (mars) : | # |
review:
Approve
Revision history for this message
Launchpad PQM Bot (launchpad-pqm) wrote : | # |
The attempt to merge lp:~mars/launchpad/test-ghost-update into lp:~launchpad/launchpad/ghost-line failed. Below is the output from the failed tests.
rm -f lib/canonical/
rm -f -r lazr-js/build
rm -f -r bin
rm -f -r parts
rm -f -r develop-eggs
rm -f .installed.cfg
rm -f -r build
rm -f _pythonpath.py
make -C sourcecode/
make: *** sourcecode/
make: *** [clean] Error 2
- 11785. By Māris Fogels
-
Added a file for testing
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'lib/lp/buildmaster/doc/builder.txt' | |||
2 | --- lib/lp/buildmaster/doc/builder.txt 2010-09-24 12:10:52 +0000 | |||
3 | +++ lib/lp/buildmaster/doc/builder.txt 2010-12-07 16:24:04 +0000 | |||
4 | @@ -19,6 +19,9 @@ | |||
5 | 19 | As expected, it implements IBuilder. | 19 | As expected, it implements IBuilder. |
6 | 20 | 20 | ||
7 | 21 | >>> from canonical.launchpad.webapp.testing import verifyObject | 21 | >>> from canonical.launchpad.webapp.testing import verifyObject |
8 | 22 | >>> from lp.buildmaster.interfaces.builder import IBuilder | ||
9 | 23 | >>> verifyObject(IBuilder, builder) | ||
10 | 24 | True | ||
11 | 22 | 25 | ||
12 | 23 | >>> print builder.name | 26 | >>> print builder.name |
13 | 24 | bob | 27 | bob |
14 | @@ -83,7 +86,7 @@ | |||
15 | 83 | The 'new' method will create a new builder in the database. | 86 | The 'new' method will create a new builder in the database. |
16 | 84 | 87 | ||
17 | 85 | >>> bnew = builderset.new(1, 'http://dummy.com:8221/', 'dummy', | 88 | >>> bnew = builderset.new(1, 'http://dummy.com:8221/', 'dummy', |
19 | 86 | ... 'Dummy Title', 'eh ?', 1) | 89 | ... 'Dummy Title', 'eh ?', 1) |
20 | 87 | >>> bnew.name | 90 | >>> bnew.name |
21 | 88 | u'dummy' | 91 | u'dummy' |
22 | 89 | 92 | ||
23 | @@ -167,7 +170,7 @@ | |||
24 | 167 | >>> recipe_bq.processor = i386_family.processors[0] | 170 | >>> recipe_bq.processor = i386_family.processors[0] |
25 | 168 | >>> recipe_bq.virtualized = True | 171 | >>> recipe_bq.virtualized = True |
26 | 169 | >>> transaction.commit() | 172 | >>> transaction.commit() |
28 | 170 | 173 | ||
29 | 171 | >>> queue_sizes = builderset.getBuildQueueSizes() | 174 | >>> queue_sizes = builderset.getBuildQueueSizes() |
30 | 172 | >>> print queue_sizes['virt']['386'] | 175 | >>> print queue_sizes['virt']['386'] |
31 | 173 | (1L, datetime.timedelta(0, 64)) | 176 | (1L, datetime.timedelta(0, 64)) |
32 | @@ -185,3 +188,116 @@ | |||
33 | 185 | 188 | ||
34 | 186 | >>> print queue_sizes['virt']['386'] | 189 | >>> print queue_sizes['virt']['386'] |
35 | 187 | (2L, datetime.timedelta(0, 128)) | 190 | (2L, datetime.timedelta(0, 128)) |
36 | 191 | |||
37 | 192 | |||
38 | 193 | Resuming buildd slaves | ||
39 | 194 | ====================== | ||
40 | 195 | |||
41 | 196 | Virtual slaves are resumed using a command specified in the | ||
42 | 197 | configuration profile. Production configuration uses a SSH trigger | ||
43 | 198 | account accessed via a private key available in the builddmaster | ||
44 | 199 | machine (which used ftpmaster configuration profile) as in: | ||
45 | 200 | |||
46 | 201 | {{{ | ||
47 | 202 | ssh ~/.ssh/ppa-reset-key ppa@%(vm_host)s | ||
48 | 203 | }}} | ||
49 | 204 | |||
50 | 205 | The test configuration uses a fake command that can be performed in | ||
51 | 206 | development machine and allow us to tests the important features used | ||
52 | 207 | in production, as 'vm_host' variable replacement. | ||
53 | 208 | |||
54 | 209 | >>> from canonical.config import config | ||
55 | 210 | >>> config.builddmaster.vm_resume_command | ||
56 | 211 | 'echo %(vm_host)s' | ||
57 | 212 | |||
58 | 213 | Before performing the command, it checks if the builder is indeed | ||
59 | 214 | virtual and raises CannotResumeHost if it isn't. | ||
60 | 215 | |||
61 | 216 | >>> bob = getUtility(IBuilderSet)['bob'] | ||
62 | 217 | >>> bob.resumeSlaveHost() | ||
63 | 218 | Traceback (most recent call last): | ||
64 | 219 | ... | ||
65 | 220 | CannotResumeHost: Builder is not virtualized. | ||
66 | 221 | |||
67 | 222 | For testing purposes resumeSlaveHost returns the stdout and stderr | ||
68 | 223 | buffer resulted from the command. | ||
69 | 224 | |||
70 | 225 | >>> frog = getUtility(IBuilderSet)['frog'] | ||
71 | 226 | >>> out, err = frog.resumeSlaveHost() | ||
72 | 227 | >>> print out.strip() | ||
73 | 228 | localhost-host.ppa | ||
74 | 229 | |||
75 | 230 | If the specified command fails, resumeSlaveHost also raises | ||
76 | 231 | CannotResumeHost exception with the results stdout and stderr. | ||
77 | 232 | |||
78 | 233 | # The command must have a vm_host dict key and when executed, | ||
79 | 234 | # have a returncode that is not 0. | ||
80 | 235 | >>> vm_resume_command = """ | ||
81 | 236 | ... [builddmaster] | ||
82 | 237 | ... vm_resume_command: test "%(vm_host)s = 'false'" | ||
83 | 238 | ... """ | ||
84 | 239 | >>> config.push('vm_resume_command', vm_resume_command) | ||
85 | 240 | >>> frog.resumeSlaveHost() | ||
86 | 241 | Traceback (most recent call last): | ||
87 | 242 | ... | ||
88 | 243 | CannotResumeHost: Resuming failed: | ||
89 | 244 | OUT: | ||
90 | 245 | <BLANKLINE> | ||
91 | 246 | ERR: | ||
92 | 247 | <BLANKLINE> | ||
93 | 248 | |||
94 | 249 | Restore default value for resume command. | ||
95 | 250 | |||
96 | 251 | >>> config_data = config.pop('vm_resume_command') | ||
97 | 252 | |||
98 | 253 | |||
99 | 254 | Rescuing lost slaves | ||
100 | 255 | ==================== | ||
101 | 256 | |||
102 | 257 | Builder.rescueIfLost() checks the build ID reported in the slave status | ||
103 | 258 | against the database. If it isn't building what we think it should be, | ||
104 | 259 | the current build will be aborted and the slave cleaned in preparation | ||
105 | 260 | for a new task. The decision about the slave's correctness is left up | ||
106 | 261 | to IBuildFarmJobBehavior.verifySlaveBuildCookie -- for these examples we | ||
107 | 262 | will use a special behavior that just checks if the cookie reads 'good'. | ||
108 | 263 | |||
109 | 264 | >>> import logging | ||
110 | 265 | >>> from lp.buildmaster.interfaces.builder import CorruptBuildCookie | ||
111 | 266 | >>> from lp.buildmaster.tests.mock_slaves import ( | ||
112 | 267 | ... BuildingSlave, MockBuilder, OkSlave, WaitingSlave) | ||
113 | 268 | |||
114 | 269 | >>> class TestBuildBehavior: | ||
115 | 270 | ... def verifySlaveBuildCookie(self, cookie): | ||
116 | 271 | ... if cookie != 'good': | ||
117 | 272 | ... raise CorruptBuildCookie('Bad value') | ||
118 | 273 | |||
119 | 274 | >>> def rescue_slave_if_lost(slave): | ||
120 | 275 | ... builder = MockBuilder('mock', slave, TestBuildBehavior()) | ||
121 | 276 | ... builder.rescueIfLost(logging.getLogger()) | ||
122 | 277 | |||
123 | 278 | An idle slave is not rescued. | ||
124 | 279 | |||
125 | 280 | >>> rescue_slave_if_lost(OkSlave()) | ||
126 | 281 | |||
127 | 282 | Slaves building or having built the correct build are not rescued | ||
128 | 283 | either. | ||
129 | 284 | |||
130 | 285 | >>> rescue_slave_if_lost(BuildingSlave(build_id='good')) | ||
131 | 286 | >>> rescue_slave_if_lost(WaitingSlave(build_id='good')) | ||
132 | 287 | |||
133 | 288 | But if a slave is building the wrong ID, it is declared lost and | ||
134 | 289 | an abort is attempted. MockSlave prints out a message when it is aborted | ||
135 | 290 | or cleaned. | ||
136 | 291 | |||
137 | 292 | >>> rescue_slave_if_lost(BuildingSlave(build_id='bad')) | ||
138 | 293 | Aborting slave | ||
139 | 294 | INFO:root:Builder 'mock' rescued from 'bad': 'Bad value' | ||
140 | 295 | |||
141 | 296 | Slaves having completed an incorrect build are also declared lost, | ||
142 | 297 | but there's no need to abort a completed build. Such builders are | ||
143 | 298 | instead simply cleaned, ready for the next build. | ||
144 | 299 | |||
145 | 300 | >>> rescue_slave_if_lost(WaitingSlave(build_id='bad')) | ||
146 | 301 | Cleaning slave | ||
147 | 302 | INFO:root:Builder 'mock' rescued from 'bad': 'Bad value' | ||
148 | 303 | |||
149 | 188 | 304 | ||
150 | === modified file 'lib/lp/buildmaster/interfaces/builder.py' | |||
151 | --- lib/lp/buildmaster/interfaces/builder.py 2010-10-18 11:57:09 +0000 | |||
152 | +++ lib/lp/buildmaster/interfaces/builder.py 2010-12-07 16:24:04 +0000 | |||
153 | @@ -154,6 +154,11 @@ | |||
154 | 154 | 154 | ||
155 | 155 | currentjob = Attribute("BuildQueue instance for job being processed.") | 155 | currentjob = Attribute("BuildQueue instance for job being processed.") |
156 | 156 | 156 | ||
157 | 157 | is_available = Bool( | ||
158 | 158 | title=_("Whether or not a builder is available for building " | ||
159 | 159 | "new jobs. "), | ||
160 | 160 | required=False) | ||
161 | 161 | |||
162 | 157 | failure_count = Int( | 162 | failure_count = Int( |
163 | 158 | title=_('Failure Count'), required=False, default=0, | 163 | title=_('Failure Count'), required=False, default=0, |
164 | 159 | description=_("Number of consecutive failures for this builder.")) | 164 | description=_("Number of consecutive failures for this builder.")) |
165 | @@ -168,74 +173,32 @@ | |||
166 | 168 | def resetFailureCount(): | 173 | def resetFailureCount(): |
167 | 169 | """Set the failure_count back to zero.""" | 174 | """Set the failure_count back to zero.""" |
168 | 170 | 175 | ||
209 | 171 | def failBuilder(reason): | 176 | def checkSlaveAlive(): |
210 | 172 | """Mark builder as failed for a given reason.""" | 177 | """Check that the buildd slave is alive. |
211 | 173 | 178 | ||
212 | 174 | def setSlaveForTesting(proxy): | 179 | This pings the slave over the network via the echo method and looks |
213 | 175 | """Sets the RPC proxy through which to operate the build slave.""" | 180 | for the sent message as the reply. |
214 | 176 | 181 | ||
215 | 177 | def verifySlaveBuildCookie(slave_build_id): | 182 | :raises BuildDaemonError: When the slave is down. |
176 | 178 | """Verify that a slave's build cookie is consistent. | ||
177 | 179 | |||
178 | 180 | This should delegate to the current `IBuildFarmJobBehavior`. | ||
179 | 181 | """ | ||
180 | 182 | |||
181 | 183 | def transferSlaveFileToLibrarian(file_sha1, filename, private): | ||
182 | 184 | """Transfer a file from the slave to the librarian. | ||
183 | 185 | |||
184 | 186 | :param file_sha1: The file's sha1, which is how the file is addressed | ||
185 | 187 | in the slave XMLRPC protocol. Specially, the file_sha1 'buildlog' | ||
186 | 188 | will cause the build log to be retrieved and gzipped. | ||
187 | 189 | :param filename: The name of the file to be given to the librarian file | ||
188 | 190 | alias. | ||
189 | 191 | :param private: True if the build is for a private archive. | ||
190 | 192 | :return: A librarian file alias. | ||
191 | 193 | """ | ||
192 | 194 | |||
193 | 195 | def getBuildQueue(): | ||
194 | 196 | """Return a `BuildQueue` if there's an active job on this builder. | ||
195 | 197 | |||
196 | 198 | :return: A BuildQueue, or None. | ||
197 | 199 | """ | ||
198 | 200 | |||
199 | 201 | def getCurrentBuildFarmJob(): | ||
200 | 202 | """Return a `BuildFarmJob` for this builder.""" | ||
201 | 203 | |||
202 | 204 | # All methods below here return Deferred. | ||
203 | 205 | |||
204 | 206 | def isAvailable(): | ||
205 | 207 | """Whether or not a builder is available for building new jobs. | ||
206 | 208 | |||
207 | 209 | :return: A Deferred that fires with True or False, depending on | ||
208 | 210 | whether the builder is available or not. | ||
216 | 211 | """ | 183 | """ |
217 | 212 | 184 | ||
218 | 213 | def rescueIfLost(logger=None): | 185 | def rescueIfLost(logger=None): |
219 | 214 | """Reset the slave if its job information doesn't match the DB. | 186 | """Reset the slave if its job information doesn't match the DB. |
220 | 215 | 187 | ||
229 | 216 | This checks the build ID reported in the slave status against the | 188 | If the builder is BUILDING or WAITING but has a build ID string |
230 | 217 | database. If it isn't building what we think it should be, the current | 189 | that doesn't match what is stored in the DB, we have to dismiss |
231 | 218 | build will be aborted and the slave cleaned in preparation for a new | 190 | its current actions and clean the slave for another job, assuming |
232 | 219 | task. The decision about the slave's correctness is left up to | 191 | the XMLRPC is working properly at this point. |
225 | 220 | `IBuildFarmJobBehavior.verifySlaveBuildCookie`. | ||
226 | 221 | |||
227 | 222 | :return: A Deferred that fires when the dialog with the slave is | ||
228 | 223 | finished. It does not have a return value. | ||
233 | 224 | """ | 192 | """ |
234 | 225 | 193 | ||
235 | 226 | def updateStatus(logger=None): | 194 | def updateStatus(logger=None): |
241 | 227 | """Update the builder's status by probing it. | 195 | """Update the builder's status by probing it.""" |
237 | 228 | |||
238 | 229 | :return: A Deferred that fires when the dialog with the slave is | ||
239 | 230 | finished. It does not have a return value. | ||
240 | 231 | """ | ||
242 | 232 | 196 | ||
243 | 233 | def cleanSlave(): | 197 | def cleanSlave(): |
249 | 234 | """Clean any temporary files from the slave. | 198 | """Clean any temporary files from the slave.""" |
250 | 235 | 199 | ||
251 | 236 | :return: A Deferred that fires when the dialog with the slave is | 200 | def failBuilder(reason): |
252 | 237 | finished. It does not have a return value. | 201 | """Mark builder as failed for a given reason.""" |
248 | 238 | """ | ||
253 | 239 | 202 | ||
254 | 240 | def requestAbort(): | 203 | def requestAbort(): |
255 | 241 | """Ask that a build be aborted. | 204 | """Ask that a build be aborted. |
256 | @@ -243,9 +206,6 @@ | |||
257 | 243 | This takes place asynchronously: Actually killing everything running | 206 | This takes place asynchronously: Actually killing everything running |
258 | 244 | can take some time so the slave status should be queried again to | 207 | can take some time so the slave status should be queried again to |
259 | 245 | detect when the abort has taken effect. (Look for status ABORTED). | 208 | detect when the abort has taken effect. (Look for status ABORTED). |
260 | 246 | |||
261 | 247 | :return: A Deferred that fires when the dialog with the slave is | ||
262 | 248 | finished. It does not have a return value. | ||
263 | 249 | """ | 209 | """ |
264 | 250 | 210 | ||
265 | 251 | def resumeSlaveHost(): | 211 | def resumeSlaveHost(): |
266 | @@ -257,35 +217,37 @@ | |||
267 | 257 | :raises: CannotResumeHost: if builder is not virtual or if the | 217 | :raises: CannotResumeHost: if builder is not virtual or if the |
268 | 258 | configuration command has failed. | 218 | configuration command has failed. |
269 | 259 | 219 | ||
273 | 260 | :return: A Deferred that fires when the resume operation finishes, | 220 | :return: command stdout and stderr buffers as a tuple. |
271 | 261 | whose value is a (stdout, stderr) tuple for success, or a Failure | ||
272 | 262 | whose value is a CannotResumeHost exception. | ||
274 | 263 | """ | 221 | """ |
275 | 264 | 222 | ||
276 | 223 | def setSlaveForTesting(proxy): | ||
277 | 224 | """Sets the RPC proxy through which to operate the build slave.""" | ||
278 | 225 | |||
279 | 265 | def slaveStatus(): | 226 | def slaveStatus(): |
280 | 266 | """Get the slave status for this builder. | 227 | """Get the slave status for this builder. |
281 | 267 | 228 | ||
286 | 268 | :return: A Deferred which fires when the slave dialog is complete. | 229 | :return: a dict containing at least builder_status, but potentially |
287 | 269 | Its value is a dict containing at least builder_status, but | 230 | other values included by the current build behavior. |
284 | 270 | potentially other values included by the current build | ||
285 | 271 | behavior. | ||
288 | 272 | """ | 231 | """ |
289 | 273 | 232 | ||
290 | 274 | def slaveStatusSentence(): | 233 | def slaveStatusSentence(): |
291 | 275 | """Get the slave status sentence for this builder. | 234 | """Get the slave status sentence for this builder. |
292 | 276 | 235 | ||
297 | 277 | :return: A Deferred which fires when the slave dialog is complete. | 236 | :return: A tuple with the first element containing the slave status, |
298 | 278 | Its value is a tuple with the first element containing the | 237 | build_id-queue-id and then optionally more elements depending on |
299 | 279 | slave status, build_id-queue-id and then optionally more | 238 | the status. |
300 | 280 | elements depending on the status. | 239 | """ |
301 | 240 | |||
302 | 241 | def verifySlaveBuildCookie(slave_build_id): | ||
303 | 242 | """Verify that a slave's build cookie is consistent. | ||
304 | 243 | |||
305 | 244 | This should delegate to the current `IBuildFarmJobBehavior`. | ||
306 | 281 | """ | 245 | """ |
307 | 282 | 246 | ||
308 | 283 | def updateBuild(queueItem): | 247 | def updateBuild(queueItem): |
309 | 284 | """Verify the current build job status. | 248 | """Verify the current build job status. |
310 | 285 | 249 | ||
311 | 286 | Perform the required actions for each state. | 250 | Perform the required actions for each state. |
312 | 287 | |||
313 | 288 | :return: A Deferred that fires when the slave dialog is finished. | ||
314 | 289 | """ | 251 | """ |
315 | 290 | 252 | ||
316 | 291 | def startBuild(build_queue_item, logger): | 253 | def startBuild(build_queue_item, logger): |
317 | @@ -293,10 +255,21 @@ | |||
318 | 293 | 255 | ||
319 | 294 | :param build_queue_item: A BuildQueueItem to build. | 256 | :param build_queue_item: A BuildQueueItem to build. |
320 | 295 | :param logger: A logger to be used to log diagnostic information. | 257 | :param logger: A logger to be used to log diagnostic information. |
325 | 296 | 258 | :raises BuildSlaveFailure: When the build slave fails. | |
326 | 297 | :return: A Deferred that fires after the dispatch has completed whose | 259 | :raises CannotBuild: When a build cannot be started for some reason |
327 | 298 | value is None, or a Failure that contains an exception | 260 | other than the build slave failing. |
328 | 299 | explaining what went wrong. | 261 | """ |
329 | 262 | |||
330 | 263 | def transferSlaveFileToLibrarian(file_sha1, filename, private): | ||
331 | 264 | """Transfer a file from the slave to the librarian. | ||
332 | 265 | |||
333 | 266 | :param file_sha1: The file's sha1, which is how the file is addressed | ||
334 | 267 | in the slave XMLRPC protocol. Specially, the file_sha1 'buildlog' | ||
335 | 268 | will cause the build log to be retrieved and gzipped. | ||
336 | 269 | :param filename: The name of the file to be given to the librarian file | ||
337 | 270 | alias. | ||
338 | 271 | :param private: True if the build is for a private archive. | ||
339 | 272 | :return: A librarian file alias. | ||
340 | 300 | """ | 273 | """ |
341 | 301 | 274 | ||
342 | 302 | def handleTimeout(logger, error_message): | 275 | def handleTimeout(logger, error_message): |
343 | @@ -311,8 +284,6 @@ | |||
344 | 311 | 284 | ||
345 | 312 | :param logger: The logger object to be used for logging. | 285 | :param logger: The logger object to be used for logging. |
346 | 313 | :param error_message: The error message to be used for logging. | 286 | :param error_message: The error message to be used for logging. |
347 | 314 | :return: A Deferred that fires after the virtual slave was resumed | ||
348 | 315 | or immediately if it's a non-virtual slave. | ||
349 | 316 | """ | 287 | """ |
350 | 317 | 288 | ||
351 | 318 | def findAndStartJob(buildd_slave=None): | 289 | def findAndStartJob(buildd_slave=None): |
352 | @@ -320,9 +291,17 @@ | |||
353 | 320 | 291 | ||
354 | 321 | :param buildd_slave: An optional buildd slave that this builder should | 292 | :param buildd_slave: An optional buildd slave that this builder should |
355 | 322 | talk to. | 293 | talk to. |
359 | 323 | :return: A Deferred whose value is the `IBuildQueue` instance | 294 | :return: the `IBuildQueue` instance found or None if no job was found. |
360 | 324 | found or None if no job was found. | 295 | """ |
361 | 325 | """ | 296 | |
362 | 297 | def getBuildQueue(): | ||
363 | 298 | """Return a `BuildQueue` if there's an active job on this builder. | ||
364 | 299 | |||
365 | 300 | :return: A BuildQueue, or None. | ||
366 | 301 | """ | ||
367 | 302 | |||
368 | 303 | def getCurrentBuildFarmJob(): | ||
369 | 304 | """Return a `BuildFarmJob` for this builder.""" | ||
370 | 326 | 305 | ||
371 | 327 | 306 | ||
372 | 328 | class IBuilderSet(Interface): | 307 | class IBuilderSet(Interface): |
373 | 329 | 308 | ||
374 | === modified file 'lib/lp/buildmaster/manager.py' | |||
375 | --- lib/lp/buildmaster/manager.py 2010-10-20 12:28:46 +0000 | |||
376 | +++ lib/lp/buildmaster/manager.py 2010-12-07 16:24:04 +0000 | |||
377 | @@ -10,10 +10,13 @@ | |||
378 | 10 | 'BuilddManager', | 10 | 'BuilddManager', |
379 | 11 | 'BUILDD_MANAGER_LOG_NAME', | 11 | 'BUILDD_MANAGER_LOG_NAME', |
380 | 12 | 'FailDispatchResult', | 12 | 'FailDispatchResult', |
381 | 13 | 'RecordingSlave', | ||
382 | 13 | 'ResetDispatchResult', | 14 | 'ResetDispatchResult', |
383 | 15 | 'buildd_success_result_map', | ||
384 | 14 | ] | 16 | ] |
385 | 15 | 17 | ||
386 | 16 | import logging | 18 | import logging |
387 | 19 | import os | ||
388 | 17 | 20 | ||
389 | 18 | import transaction | 21 | import transaction |
390 | 19 | from twisted.application import service | 22 | from twisted.application import service |
391 | @@ -21,27 +24,129 @@ | |||
392 | 21 | defer, | 24 | defer, |
393 | 22 | reactor, | 25 | reactor, |
394 | 23 | ) | 26 | ) |
396 | 24 | from twisted.internet.task import LoopingCall | 27 | from twisted.protocols.policies import TimeoutMixin |
397 | 25 | from twisted.python import log | 28 | from twisted.python import log |
398 | 29 | from twisted.python.failure import Failure | ||
399 | 30 | from twisted.web import xmlrpc | ||
400 | 26 | from zope.component import getUtility | 31 | from zope.component import getUtility |
401 | 27 | 32 | ||
402 | 33 | from canonical.config import config | ||
403 | 34 | from canonical.launchpad.webapp import urlappend | ||
404 | 35 | from lp.services.database import write_transaction | ||
405 | 28 | from lp.buildmaster.enums import BuildStatus | 36 | from lp.buildmaster.enums import BuildStatus |
417 | 29 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( | 37 | from lp.services.twistedsupport.processmonitor import ProcessWithTimeout |
407 | 30 | BuildBehaviorMismatch, | ||
408 | 31 | ) | ||
409 | 32 | from lp.buildmaster.model.builder import Builder | ||
410 | 33 | from lp.buildmaster.interfaces.builder import ( | ||
411 | 34 | BuildDaemonError, | ||
412 | 35 | BuildSlaveFailure, | ||
413 | 36 | CannotBuild, | ||
414 | 37 | CannotFetchFile, | ||
415 | 38 | CannotResumeHost, | ||
416 | 39 | ) | ||
418 | 40 | 38 | ||
419 | 41 | 39 | ||
420 | 42 | BUILDD_MANAGER_LOG_NAME = "slave-scanner" | 40 | BUILDD_MANAGER_LOG_NAME = "slave-scanner" |
421 | 43 | 41 | ||
422 | 44 | 42 | ||
423 | 43 | buildd_success_result_map = { | ||
424 | 44 | 'ensurepresent': True, | ||
425 | 45 | 'build': 'BuilderStatus.BUILDING', | ||
426 | 46 | } | ||
427 | 47 | |||
428 | 48 | |||
429 | 49 | class QueryWithTimeoutProtocol(xmlrpc.QueryProtocol, TimeoutMixin): | ||
430 | 50 | """XMLRPC query protocol with a configurable timeout. | ||
431 | 51 | |||
432 | 52 | XMLRPC queries using this protocol will be unconditionally closed | ||
433 | 53 | when the timeout is elapsed. The timeout is fetched from the context | ||
434 | 54 | Launchpad configuration file (`config.builddmaster.socket_timeout`). | ||
435 | 55 | """ | ||
436 | 56 | def connectionMade(self): | ||
437 | 57 | xmlrpc.QueryProtocol.connectionMade(self) | ||
438 | 58 | self.setTimeout(config.builddmaster.socket_timeout) | ||
439 | 59 | |||
440 | 60 | |||
441 | 61 | class QueryFactoryWithTimeout(xmlrpc._QueryFactory): | ||
442 | 62 | """XMLRPC client factory with timeout support.""" | ||
443 | 63 | # Make this factory quiet. | ||
444 | 64 | noisy = False | ||
445 | 65 | # Use the protocol with timeout support. | ||
446 | 66 | protocol = QueryWithTimeoutProtocol | ||
447 | 67 | |||
448 | 68 | |||
449 | 69 | class RecordingSlave: | ||
450 | 70 | """An RPC proxy for buildd slaves that records instructions to the latter. | ||
451 | 71 | |||
452 | 72 | The idea here is to merely record the instructions that the slave-scanner | ||
453 | 73 | issues to the buildd slaves and "replay" them a bit later in asynchronous | ||
454 | 74 | and parallel fashion. | ||
455 | 75 | |||
456 | 76 | By dealing with a number of buildd slaves in parallel we remove *the* | ||
457 | 77 | major slave-scanner throughput issue while avoiding large-scale changes to | ||
458 | 78 | its code base. | ||
459 | 79 | """ | ||
460 | 80 | |||
461 | 81 | def __init__(self, name, url, vm_host): | ||
462 | 82 | self.name = name | ||
463 | 83 | self.url = url | ||
464 | 84 | self.vm_host = vm_host | ||
465 | 85 | |||
466 | 86 | self.resume_requested = False | ||
467 | 87 | self.calls = [] | ||
468 | 88 | |||
469 | 89 | def __repr__(self): | ||
470 | 90 | return '<%s:%s>' % (self.name, self.url) | ||
471 | 91 | |||
472 | 92 | def cacheFile(self, logger, libraryfilealias): | ||
473 | 93 | """Cache the file on the server.""" | ||
474 | 94 | self.ensurepresent( | ||
475 | 95 | libraryfilealias.content.sha1, libraryfilealias.http_url, '', '') | ||
476 | 96 | |||
477 | 97 | def sendFileToSlave(self, *args): | ||
478 | 98 | """Helper to send a file to this builder.""" | ||
479 | 99 | return self.ensurepresent(*args) | ||
480 | 100 | |||
481 | 101 | def ensurepresent(self, *args): | ||
482 | 102 | """Download files needed for the build.""" | ||
483 | 103 | self.calls.append(('ensurepresent', args)) | ||
484 | 104 | result = buildd_success_result_map.get('ensurepresent') | ||
485 | 105 | return [result, 'Download'] | ||
486 | 106 | |||
487 | 107 | def build(self, *args): | ||
488 | 108 | """Perform the build.""" | ||
489 | 109 | # XXX: This method does not appear to be used. | ||
490 | 110 | self.calls.append(('build', args)) | ||
491 | 111 | result = buildd_success_result_map.get('build') | ||
492 | 112 | return [result, args[0]] | ||
493 | 113 | |||
494 | 114 | def resume(self): | ||
495 | 115 | """Record the request to resume the builder.. | ||
496 | 116 | |||
497 | 117 | Always succeed. | ||
498 | 118 | |||
499 | 119 | :return: a (stdout, stderr, subprocess exitcode) triple | ||
500 | 120 | """ | ||
501 | 121 | self.resume_requested = True | ||
502 | 122 | return ['', '', 0] | ||
503 | 123 | |||
504 | 124 | def resumeSlave(self, clock=None): | ||
505 | 125 | """Resume the builder in a asynchronous fashion. | ||
506 | 126 | |||
507 | 127 | Used the configuration command-line in the same way | ||
508 | 128 | `BuilddSlave.resume` does. | ||
509 | 129 | |||
510 | 130 | Also use the builddmaster configuration 'socket_timeout' as | ||
511 | 131 | the process timeout. | ||
512 | 132 | |||
513 | 133 | :param clock: An optional twisted.internet.task.Clock to override | ||
514 | 134 | the default clock. For use in tests. | ||
515 | 135 | |||
516 | 136 | :return: a Deferred | ||
517 | 137 | """ | ||
518 | 138 | resume_command = config.builddmaster.vm_resume_command % { | ||
519 | 139 | 'vm_host': self.vm_host} | ||
520 | 140 | # Twisted API require string and the configuration provides unicode. | ||
521 | 141 | resume_argv = [str(term) for term in resume_command.split()] | ||
522 | 142 | |||
523 | 143 | d = defer.Deferred() | ||
524 | 144 | p = ProcessWithTimeout( | ||
525 | 145 | d, config.builddmaster.socket_timeout, clock=clock) | ||
526 | 146 | p.spawnProcess(resume_argv[0], tuple(resume_argv)) | ||
527 | 147 | return d | ||
528 | 148 | |||
529 | 149 | |||
530 | 45 | def get_builder(name): | 150 | def get_builder(name): |
531 | 46 | """Helper to return the builder given the slave for this request.""" | 151 | """Helper to return the builder given the slave for this request.""" |
532 | 47 | # Avoiding circular imports. | 152 | # Avoiding circular imports. |
533 | @@ -54,12 +159,9 @@ | |||
534 | 54 | # builder.currentjob hides a complicated query, don't run it twice. | 159 | # builder.currentjob hides a complicated query, don't run it twice. |
535 | 55 | # See bug 623281. | 160 | # See bug 623281. |
536 | 56 | current_job = builder.currentjob | 161 | current_job = builder.currentjob |
541 | 57 | if current_job is None: | 162 | build_job = current_job.specific_job.build |
538 | 58 | job_failure_count = 0 | ||
539 | 59 | else: | ||
540 | 60 | job_failure_count = current_job.specific_job.build.failure_count | ||
542 | 61 | 163 | ||
544 | 62 | if builder.failure_count == job_failure_count and current_job is not None: | 164 | if builder.failure_count == build_job.failure_count: |
545 | 63 | # If the failure count for the builder is the same as the | 165 | # If the failure count for the builder is the same as the |
546 | 64 | # failure count for the job being built, then we cannot | 166 | # failure count for the job being built, then we cannot |
547 | 65 | # tell whether the job or the builder is at fault. The best | 167 | # tell whether the job or the builder is at fault. The best |
548 | @@ -68,28 +170,17 @@ | |||
549 | 68 | current_job.reset() | 170 | current_job.reset() |
550 | 69 | return | 171 | return |
551 | 70 | 172 | ||
553 | 71 | if builder.failure_count > job_failure_count: | 173 | if builder.failure_count > build_job.failure_count: |
554 | 72 | # The builder has failed more than the jobs it's been | 174 | # The builder has failed more than the jobs it's been |
568 | 73 | # running. | 175 | # running, so let's disable it and re-schedule the build. |
569 | 74 | 176 | builder.failBuilder(fail_notes) | |
570 | 75 | # Re-schedule the build if there is one. | 177 | current_job.reset() |
558 | 76 | if current_job is not None: | ||
559 | 77 | current_job.reset() | ||
560 | 78 | |||
561 | 79 | # We are a little more tolerant with failing builders than | ||
562 | 80 | # failing jobs because sometimes they get unresponsive due to | ||
563 | 81 | # human error, flaky networks etc. We expect the builder to get | ||
564 | 82 | # better, whereas jobs are very unlikely to get better. | ||
565 | 83 | if builder.failure_count >= Builder.FAILURE_THRESHOLD: | ||
566 | 84 | # It's also gone over the threshold so let's disable it. | ||
567 | 85 | builder.failBuilder(fail_notes) | ||
571 | 86 | else: | 178 | else: |
572 | 87 | # The job is the culprit! Override its status to 'failed' | 179 | # The job is the culprit! Override its status to 'failed' |
573 | 88 | # to make sure it won't get automatically dispatched again, | 180 | # to make sure it won't get automatically dispatched again, |
574 | 89 | # and remove the buildqueue request. The failure should | 181 | # and remove the buildqueue request. The failure should |
575 | 90 | # have already caused any relevant slave data to be stored | 182 | # have already caused any relevant slave data to be stored |
576 | 91 | # on the build record so don't worry about that here. | 183 | # on the build record so don't worry about that here. |
577 | 92 | build_job = current_job.specific_job.build | ||
578 | 93 | build_job.status = BuildStatus.FAILEDTOBUILD | 184 | build_job.status = BuildStatus.FAILEDTOBUILD |
579 | 94 | builder.currentjob.destroySelf() | 185 | builder.currentjob.destroySelf() |
580 | 95 | 186 | ||
581 | @@ -99,108 +190,133 @@ | |||
582 | 99 | # next buildd scan. | 190 | # next buildd scan. |
583 | 100 | 191 | ||
584 | 101 | 192 | ||
585 | 193 | class BaseDispatchResult: | ||
586 | 194 | """Base class for *DispatchResult variations. | ||
587 | 195 | |||
588 | 196 | It will be extended to represent dispatching results and allow | ||
589 | 197 | homogeneous processing. | ||
590 | 198 | """ | ||
591 | 199 | |||
592 | 200 | def __init__(self, slave, info=None): | ||
593 | 201 | self.slave = slave | ||
594 | 202 | self.info = info | ||
595 | 203 | |||
596 | 204 | def _cleanJob(self, job): | ||
597 | 205 | """Clean up in case of builder reset or dispatch failure.""" | ||
598 | 206 | if job is not None: | ||
599 | 207 | job.reset() | ||
600 | 208 | |||
601 | 209 | def assessFailureCounts(self): | ||
602 | 210 | """View builder/job failure_count and work out which needs to die. | ||
603 | 211 | |||
604 | 212 | :return: True if we disabled something, False if we did not. | ||
605 | 213 | """ | ||
606 | 214 | builder = get_builder(self.slave.name) | ||
607 | 215 | assessFailureCounts(builder, self.info) | ||
608 | 216 | |||
609 | 217 | def ___call__(self): | ||
610 | 218 | raise NotImplementedError( | ||
611 | 219 | "Call sites must define an evaluation method.") | ||
612 | 220 | |||
613 | 221 | |||
614 | 222 | class FailDispatchResult(BaseDispatchResult): | ||
615 | 223 | """Represents a communication failure while dispatching a build job.. | ||
616 | 224 | |||
617 | 225 | When evaluated this object mark the corresponding `IBuilder` as | ||
618 | 226 | 'NOK' with the given text as 'failnotes'. It also cleans up the running | ||
619 | 227 | job (`IBuildQueue`). | ||
620 | 228 | """ | ||
621 | 229 | |||
622 | 230 | def __repr__(self): | ||
623 | 231 | return '%r failure (%s)' % (self.slave, self.info) | ||
624 | 232 | |||
625 | 233 | @write_transaction | ||
626 | 234 | def __call__(self): | ||
627 | 235 | self.assessFailureCounts() | ||
628 | 236 | |||
629 | 237 | |||
630 | 238 | class ResetDispatchResult(BaseDispatchResult): | ||
631 | 239 | """Represents a failure to reset a builder. | ||
632 | 240 | |||
633 | 241 | When evaluated this object simply cleans up the running job | ||
634 | 242 | (`IBuildQueue`) and marks the builder down. | ||
635 | 243 | """ | ||
636 | 244 | |||
637 | 245 | def __repr__(self): | ||
638 | 246 | return '%r reset failure' % self.slave | ||
639 | 247 | |||
640 | 248 | @write_transaction | ||
641 | 249 | def __call__(self): | ||
642 | 250 | builder = get_builder(self.slave.name) | ||
643 | 251 | # Builders that fail to reset should be disabled as per bug | ||
644 | 252 | # 563353. | ||
645 | 253 | # XXX Julian bug=586362 | ||
646 | 254 | # This is disabled until this code is not also used for dispatch | ||
647 | 255 | # failures where we *don't* want to disable the builder. | ||
648 | 256 | # builder.failBuilder(self.info) | ||
649 | 257 | self._cleanJob(builder.currentjob) | ||
650 | 258 | |||
651 | 259 | |||
652 | 102 | class SlaveScanner: | 260 | class SlaveScanner: |
653 | 103 | """A manager for a single builder.""" | 261 | """A manager for a single builder.""" |
654 | 104 | 262 | ||
655 | 105 | # The interval between each poll cycle, in seconds. We'd ideally | ||
656 | 106 | # like this to be lower but 5 seems a reasonable compromise between | ||
657 | 107 | # responsivity and load on the database server, since in each cycle | ||
658 | 108 | # we can run quite a few queries. | ||
659 | 109 | SCAN_INTERVAL = 5 | 263 | SCAN_INTERVAL = 5 |
660 | 110 | 264 | ||
661 | 265 | # These are for the benefit of tests; see `TestingSlaveScanner`. | ||
662 | 266 | # It pokes fake versions in here so that it can verify methods were | ||
663 | 267 | # called. The tests should really be using FakeMethod() though. | ||
664 | 268 | reset_result = ResetDispatchResult | ||
665 | 269 | fail_result = FailDispatchResult | ||
666 | 270 | |||
667 | 111 | def __init__(self, builder_name, logger): | 271 | def __init__(self, builder_name, logger): |
668 | 112 | self.builder_name = builder_name | 272 | self.builder_name = builder_name |
669 | 113 | self.logger = logger | 273 | self.logger = logger |
670 | 274 | self._deferred_list = [] | ||
671 | 275 | |||
672 | 276 | def scheduleNextScanCycle(self): | ||
673 | 277 | """Schedule another scan of the builder some time in the future.""" | ||
674 | 278 | self._deferred_list = [] | ||
675 | 279 | # XXX: Change this to use LoopingCall. | ||
676 | 280 | reactor.callLater(self.SCAN_INTERVAL, self.startCycle) | ||
677 | 114 | 281 | ||
678 | 115 | def startCycle(self): | 282 | def startCycle(self): |
679 | 116 | """Scan the builder and dispatch to it or deal with failures.""" | 283 | """Scan the builder and dispatch to it or deal with failures.""" |
680 | 117 | self.loop = LoopingCall(self.singleCycle) | ||
681 | 118 | self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL) | ||
682 | 119 | return self.stopping_deferred | ||
683 | 120 | |||
684 | 121 | def stopCycle(self): | ||
685 | 122 | """Terminate the LoopingCall.""" | ||
686 | 123 | self.loop.stop() | ||
687 | 124 | |||
688 | 125 | def singleCycle(self): | ||
689 | 126 | self.logger.debug("Scanning builder: %s" % self.builder_name) | 284 | self.logger.debug("Scanning builder: %s" % self.builder_name) |
714 | 127 | d = self.scan() | 285 | |
715 | 128 | 286 | try: | |
716 | 129 | d.addErrback(self._scanFailed) | 287 | slave = self.scan() |
717 | 130 | return d | 288 | if slave is None: |
718 | 131 | 289 | self.scheduleNextScanCycle() | |
719 | 132 | def _scanFailed(self, failure): | 290 | else: |
720 | 133 | """Deal with failures encountered during the scan cycle. | 291 | # XXX: Ought to return Deferred. |
721 | 134 | 292 | self.resumeAndDispatch(slave) | |
722 | 135 | 1. Print the error in the log | 293 | except: |
723 | 136 | 2. Increment and assess failure counts on the builder and job. | 294 | error = Failure() |
700 | 137 | """ | ||
701 | 138 | # Make sure that pending database updates are removed as it | ||
702 | 139 | # could leave the database in an inconsistent state (e.g. The | ||
703 | 140 | # job says it's running but the buildqueue has no builder set). | ||
704 | 141 | transaction.abort() | ||
705 | 142 | |||
706 | 143 | # If we don't recognise the exception include a stack trace with | ||
707 | 144 | # the error. | ||
708 | 145 | error_message = failure.getErrorMessage() | ||
709 | 146 | if failure.check( | ||
710 | 147 | BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch, | ||
711 | 148 | CannotResumeHost, BuildDaemonError, CannotFetchFile): | ||
712 | 149 | self.logger.info("Scanning failed with: %s" % error_message) | ||
713 | 150 | else: | ||
724 | 151 | self.logger.info("Scanning failed with: %s\n%s" % | 295 | self.logger.info("Scanning failed with: %s\n%s" % |
726 | 152 | (failure.getErrorMessage(), failure.getTraceback())) | 296 | (error.getErrorMessage(), error.getTraceback())) |
727 | 153 | 297 | ||
728 | 154 | # Decide if we need to terminate the job or fail the | ||
729 | 155 | # builder. | ||
730 | 156 | try: | ||
731 | 157 | builder = get_builder(self.builder_name) | 298 | builder = get_builder(self.builder_name) |
748 | 158 | builder.gotFailure() | 299 | |
749 | 159 | if builder.currentjob is not None: | 300 | # Decide if we need to terminate the job or fail the |
750 | 160 | build_farm_job = builder.getCurrentBuildFarmJob() | 301 | # builder. |
751 | 161 | build_farm_job.gotFailure() | 302 | self._incrementFailureCounts(builder) |
752 | 162 | self.logger.info( | 303 | self.logger.info( |
753 | 163 | "builder %s failure count: %s, " | 304 | "builder failure count: %s, job failure count: %s" % ( |
754 | 164 | "job '%s' failure count: %s" % ( | 305 | builder.failure_count, |
755 | 165 | self.builder_name, | 306 | builder.getCurrentBuildFarmJob().failure_count)) |
756 | 166 | builder.failure_count, | 307 | assessFailureCounts(builder, error.getErrorMessage()) |
741 | 167 | build_farm_job.title, | ||
742 | 168 | build_farm_job.failure_count)) | ||
743 | 169 | else: | ||
744 | 170 | self.logger.info( | ||
745 | 171 | "Builder %s failed a probe, count: %s" % ( | ||
746 | 172 | self.builder_name, builder.failure_count)) | ||
747 | 173 | assessFailureCounts(builder, failure.getErrorMessage()) | ||
757 | 174 | transaction.commit() | 308 | transaction.commit() |
765 | 175 | except: | 309 | |
766 | 176 | # Catastrophic code failure! Not much we can do. | 310 | self.scheduleNextScanCycle() |
767 | 177 | self.logger.error( | 311 | |
768 | 178 | "Miserable failure when trying to examine failure counts:\n", | 312 | @write_transaction |
762 | 179 | exc_info=True) | ||
763 | 180 | transaction.abort() | ||
764 | 181 | |||
769 | 182 | def scan(self): | 313 | def scan(self): |
770 | 183 | """Probe the builder and update/dispatch/collect as appropriate. | 314 | """Probe the builder and update/dispatch/collect as appropriate. |
771 | 184 | 315 | ||
791 | 185 | There are several steps to scanning: | 316 | The whole method is wrapped in a transaction, but we do partial |
792 | 186 | 317 | commits to avoid holding locks on tables. | |
793 | 187 | 1. If the builder is marked as "ok" then probe it to see what state | 318 | |
794 | 188 | it's in. This is where lost jobs are rescued if we think the | 319 | :return: A `RecordingSlave` if we dispatched a job to it, or None. |
776 | 189 | builder is doing something that it later tells us it's not, | ||
777 | 190 | and also where the multi-phase abort procedure happens. | ||
778 | 191 | See IBuilder.rescueIfLost, which is called by | ||
779 | 192 | IBuilder.updateStatus(). | ||
780 | 193 | 2. If the builder is still happy, we ask it if it has an active build | ||
781 | 194 | and then either update the build in Launchpad or collect the | ||
782 | 195 | completed build. (builder.updateBuild) | ||
783 | 196 | 3. If the builder is not happy or it was marked as unavailable | ||
784 | 197 | mid-build, we need to reset the job that we thought it had, so | ||
785 | 198 | that the job is dispatched elsewhere. | ||
786 | 199 | 4. If the builder is idle and we have another build ready, dispatch | ||
787 | 200 | it. | ||
788 | 201 | |||
789 | 202 | :return: A Deferred that fires when the scan is complete, whose | ||
790 | 203 | value is A `BuilderSlave` if we dispatched a job to it, or None. | ||
795 | 204 | """ | 320 | """ |
796 | 205 | # We need to re-fetch the builder object on each cycle as the | 321 | # We need to re-fetch the builder object on each cycle as the |
797 | 206 | # Storm store is invalidated over transaction boundaries. | 322 | # Storm store is invalidated over transaction boundaries. |
798 | @@ -208,72 +324,240 @@ | |||
799 | 208 | self.builder = get_builder(self.builder_name) | 324 | self.builder = get_builder(self.builder_name) |
800 | 209 | 325 | ||
801 | 210 | if self.builder.builderok: | 326 | if self.builder.builderok: |
803 | 211 | d = self.builder.updateStatus(self.logger) | 327 | self.builder.updateStatus(self.logger) |
804 | 328 | transaction.commit() | ||
805 | 329 | |||
806 | 330 | # See if we think there's an active build on the builder. | ||
807 | 331 | buildqueue = self.builder.getBuildQueue() | ||
808 | 332 | |||
809 | 333 | # XXX Julian 2010-07-29 bug=611258 | ||
810 | 334 | # We're not using the RecordingSlave until dispatching, which | ||
811 | 335 | # means that this part blocks until we've received a response | ||
812 | 336 | # from the builder. updateBuild() needs to be made | ||
813 | 337 | # asyncronous. | ||
814 | 338 | |||
815 | 339 | # Scan the slave and get the logtail, or collect the build if | ||
816 | 340 | # it's ready. Yes, "updateBuild" is a bad name. | ||
817 | 341 | if buildqueue is not None: | ||
818 | 342 | self.builder.updateBuild(buildqueue) | ||
819 | 343 | transaction.commit() | ||
820 | 344 | |||
821 | 345 | # If the builder is in manual mode, don't dispatch anything. | ||
822 | 346 | if self.builder.manual: | ||
823 | 347 | self.logger.debug( | ||
824 | 348 | '%s is in manual mode, not dispatching.' % self.builder.name) | ||
825 | 349 | return None | ||
826 | 350 | |||
827 | 351 | # If the builder is marked unavailable, don't dispatch anything. | ||
828 | 352 | # Additionaly, because builders can be removed from the pool at | ||
829 | 353 | # any time, we need to see if we think there was a build running | ||
830 | 354 | # on it before it was marked unavailable. In this case we reset | ||
831 | 355 | # the build thusly forcing it to get re-dispatched to another | ||
832 | 356 | # builder. | ||
833 | 357 | if not self.builder.is_available: | ||
834 | 358 | job = self.builder.currentjob | ||
835 | 359 | if job is not None and not self.builder.builderok: | ||
836 | 360 | self.logger.info( | ||
837 | 361 | "%s was made unavailable, resetting attached " | ||
838 | 362 | "job" % self.builder.name) | ||
839 | 363 | job.reset() | ||
840 | 364 | transaction.commit() | ||
841 | 365 | return None | ||
842 | 366 | |||
843 | 367 | # See if there is a job we can dispatch to the builder slave. | ||
844 | 368 | |||
845 | 369 | # XXX: Rather than use the slave actually associated with the builder | ||
846 | 370 | # (which, incidentally, shouldn't be a property anyway), we make a new | ||
847 | 371 | # RecordingSlave so we can get access to its asynchronous | ||
848 | 372 | # "resumeSlave" method. Blech. | ||
849 | 373 | slave = RecordingSlave( | ||
850 | 374 | self.builder.name, self.builder.url, self.builder.vm_host) | ||
851 | 375 | # XXX: Passing buildd_slave=slave overwrites the 'slave' property of | ||
852 | 376 | # self.builder. Not sure why this is needed yet. | ||
853 | 377 | self.builder.findAndStartJob(buildd_slave=slave) | ||
854 | 378 | if self.builder.currentjob is not None: | ||
855 | 379 | # After a successful dispatch we can reset the | ||
856 | 380 | # failure_count. | ||
857 | 381 | self.builder.resetFailureCount() | ||
858 | 382 | transaction.commit() | ||
859 | 383 | return slave | ||
860 | 384 | |||
861 | 385 | return None | ||
862 | 386 | |||
863 | 387 | def resumeAndDispatch(self, slave): | ||
864 | 388 | """Chain the resume and dispatching Deferreds.""" | ||
865 | 389 | # XXX: resumeAndDispatch makes Deferreds without returning them. | ||
866 | 390 | if slave.resume_requested: | ||
867 | 391 | # The slave needs to be reset before we can dispatch to | ||
868 | 392 | # it (e.g. a virtual slave) | ||
869 | 393 | |||
870 | 394 | # XXX: Two problems here. The first is that 'resumeSlave' only | ||
871 | 395 | # exists on RecordingSlave (BuilderSlave calls it 'resume'). | ||
872 | 396 | d = slave.resumeSlave() | ||
873 | 397 | d.addBoth(self.checkResume, slave) | ||
874 | 212 | else: | 398 | else: |
875 | 399 | # No resume required, build dispatching can commence. | ||
876 | 213 | d = defer.succeed(None) | 400 | d = defer.succeed(None) |
877 | 214 | 401 | ||
934 | 215 | def status_updated(ignored): | 402 | # Dispatch the build to the slave asynchronously. |
935 | 216 | # Commit the changes done while possibly rescuing jobs, to | 403 | d.addCallback(self.initiateDispatch, slave) |
936 | 217 | # avoid holding table locks. | 404 | # Store this deferred so we can wait for it along with all |
937 | 218 | transaction.commit() | 405 | # the others that will be generated by RecordingSlave during |
938 | 219 | 406 | # the dispatch process, and chain a callback after they've | |
939 | 220 | # See if we think there's an active build on the builder. | 407 | # all fired. |
940 | 221 | buildqueue = self.builder.getBuildQueue() | 408 | self._deferred_list.append(d) |
941 | 222 | 409 | ||
942 | 223 | # Scan the slave and get the logtail, or collect the build if | 410 | def initiateDispatch(self, resume_result, slave): |
943 | 224 | # it's ready. Yes, "updateBuild" is a bad name. | 411 | """Start dispatching a build to a slave. |
944 | 225 | if buildqueue is not None: | 412 | |
945 | 226 | return self.builder.updateBuild(buildqueue) | 413 | If the previous task in chain (slave resuming) has failed it will |
946 | 227 | 414 | receive a `ResetBuilderRequest` instance as 'resume_result' and | |
947 | 228 | def build_updated(ignored): | 415 | will immediately return that so the subsequent callback can collect |
948 | 229 | # Commit changes done while updating the build, to avoid | 416 | it. |
949 | 230 | # holding table locks. | 417 | |
950 | 231 | transaction.commit() | 418 | If the slave resuming succeeded, it starts the XMLRPC dialogue. The |
951 | 232 | 419 | dialogue may consist of many calls to the slave before the build | |
952 | 233 | # If the builder is in manual mode, don't dispatch anything. | 420 | starts. Each call is done via a Deferred event, where slave calls |
953 | 234 | if self.builder.manual: | 421 | are sent in callSlave(), and checked in checkDispatch() which will |
954 | 235 | self.logger.debug( | 422 | keep firing events via callSlave() until all the events are done or |
955 | 236 | '%s is in manual mode, not dispatching.' % | 423 | an error occurs. |
956 | 237 | self.builder.name) | 424 | """ |
957 | 238 | return | 425 | if resume_result is not None: |
958 | 239 | 426 | self.slaveConversationEnded() | |
959 | 240 | # If the builder is marked unavailable, don't dispatch anything. | 427 | return resume_result |
960 | 241 | # Additionaly, because builders can be removed from the pool at | 428 | |
961 | 242 | # any time, we need to see if we think there was a build running | 429 | self.logger.info('Dispatching: %s' % slave) |
962 | 243 | # on it before it was marked unavailable. In this case we reset | 430 | self.callSlave(slave) |
963 | 244 | # the build thusly forcing it to get re-dispatched to another | 431 | |
964 | 245 | # builder. | 432 | def _getProxyForSlave(self, slave): |
965 | 246 | 433 | """Return a twisted.web.xmlrpc.Proxy for the buildd slave. | |
966 | 247 | return self.builder.isAvailable().addCallback(got_available) | 434 | |
967 | 248 | 435 | Uses a protocol with timeout support, See QueryFactoryWithTimeout. | |
968 | 249 | def got_available(available): | 436 | """ |
969 | 250 | if not available: | 437 | proxy = xmlrpc.Proxy(str(urlappend(slave.url, 'rpc'))) |
970 | 251 | job = self.builder.currentjob | 438 | proxy.queryFactory = QueryFactoryWithTimeout |
971 | 252 | if job is not None and not self.builder.builderok: | 439 | return proxy |
972 | 253 | self.logger.info( | 440 | |
973 | 254 | "%s was made unavailable, resetting attached " | 441 | def callSlave(self, slave): |
974 | 255 | "job" % self.builder.name) | 442 | """Dispatch the next XMLRPC for the given slave.""" |
975 | 256 | job.reset() | 443 | if len(slave.calls) == 0: |
976 | 257 | transaction.commit() | 444 | # That's the end of the dialogue with the slave. |
977 | 258 | return | 445 | self.slaveConversationEnded() |
978 | 259 | 446 | return | |
979 | 260 | # See if there is a job we can dispatch to the builder slave. | 447 | |
980 | 261 | 448 | # Get an XMLRPC proxy for the buildd slave. | |
981 | 262 | d = self.builder.findAndStartJob() | 449 | proxy = self._getProxyForSlave(slave) |
982 | 263 | def job_started(candidate): | 450 | method, args = slave.calls.pop(0) |
983 | 264 | if self.builder.currentjob is not None: | 451 | d = proxy.callRemote(method, *args) |
984 | 265 | # After a successful dispatch we can reset the | 452 | d.addBoth(self.checkDispatch, method, slave) |
985 | 266 | # failure_count. | 453 | self._deferred_list.append(d) |
986 | 267 | self.builder.resetFailureCount() | 454 | self.logger.debug('%s -> %s(%s)' % (slave, method, args)) |
987 | 268 | transaction.commit() | 455 | |
988 | 269 | return self.builder.slave | 456 | def slaveConversationEnded(self): |
989 | 270 | else: | 457 | """After all the Deferreds are set up, chain a callback on them.""" |
990 | 458 | dl = defer.DeferredList(self._deferred_list, consumeErrors=True) | ||
991 | 459 | dl.addBoth(self.evaluateDispatchResult) | ||
992 | 460 | return dl | ||
993 | 461 | |||
994 | 462 | def evaluateDispatchResult(self, deferred_list_results): | ||
995 | 463 | """Process the DispatchResult for this dispatch chain. | ||
996 | 464 | |||
997 | 465 | After waiting for the Deferred chain to finish, we'll have a | ||
998 | 466 | DispatchResult to evaluate, which deals with the result of | ||
999 | 467 | dispatching. | ||
1000 | 468 | """ | ||
1001 | 469 | # The `deferred_list_results` is what we get when waiting on a | ||
1002 | 470 | # DeferredList. It's a list of tuples of (status, result) where | ||
1003 | 471 | # result is what the last callback in that chain returned. | ||
1004 | 472 | |||
1005 | 473 | # If the result is an instance of BaseDispatchResult we need to | ||
1006 | 474 | # evaluate it, as there's further action required at the end of | ||
1007 | 475 | # the dispatch chain. None, resulting from successful chains, | ||
1008 | 476 | # are discarded. | ||
1009 | 477 | |||
1010 | 478 | dispatch_results = [ | ||
1011 | 479 | result for status, result in deferred_list_results | ||
1012 | 480 | if isinstance(result, BaseDispatchResult)] | ||
1013 | 481 | |||
1014 | 482 | for result in dispatch_results: | ||
1015 | 483 | self.logger.info("%r" % result) | ||
1016 | 484 | result() | ||
1017 | 485 | |||
1018 | 486 | # At this point, we're done dispatching, so we can schedule the | ||
1019 | 487 | # next scan cycle. | ||
1020 | 488 | self.scheduleNextScanCycle() | ||
1021 | 489 | |||
1022 | 490 | # For the test suite so that it can chain callback results. | ||
1023 | 491 | return deferred_list_results | ||
1024 | 492 | |||
1025 | 493 | def checkResume(self, response, slave): | ||
1026 | 494 | """Check the result of resuming a slave. | ||
1027 | 495 | |||
1028 | 496 | If there's a problem resuming, we return a ResetDispatchResult which | ||
1029 | 497 | will get evaluated at the end of the scan, or None if the resume | ||
1030 | 498 | was OK. | ||
1031 | 499 | |||
1032 | 500 | :param response: the tuple that's constructed in | ||
1033 | 501 | ProcessWithTimeout.processEnded(), or a Failure that | ||
1034 | 502 | contains the tuple. | ||
1035 | 503 | :param slave: the slave object we're talking to | ||
1036 | 504 | """ | ||
1037 | 505 | if isinstance(response, Failure): | ||
1038 | 506 | out, err, code = response.value | ||
1039 | 507 | else: | ||
1040 | 508 | out, err, code = response | ||
1041 | 509 | if code == os.EX_OK: | ||
1042 | 510 | return None | ||
1043 | 511 | |||
1044 | 512 | error_text = '%s\n%s' % (out, err) | ||
1045 | 513 | self.logger.error('%s resume failure: %s' % (slave, error_text)) | ||
1046 | 514 | return self.reset_result(slave, error_text) | ||
1047 | 515 | |||
1048 | 516 | def _incrementFailureCounts(self, builder): | ||
1049 | 517 | builder.gotFailure() | ||
1050 | 518 | builder.getCurrentBuildFarmJob().gotFailure() | ||
1051 | 519 | |||
1052 | 520 | def checkDispatch(self, response, method, slave): | ||
1053 | 521 | """Verify the results of a slave xmlrpc call. | ||
1054 | 522 | |||
1055 | 523 | If it failed and it compromises the slave then return a corresponding | ||
1056 | 524 | `FailDispatchResult`, if it was a communication failure, simply | ||
1057 | 525 | reset the slave by returning a `ResetDispatchResult`. | ||
1058 | 526 | """ | ||
1059 | 527 | from lp.buildmaster.interfaces.builder import IBuilderSet | ||
1060 | 528 | builder = getUtility(IBuilderSet)[slave.name] | ||
1061 | 529 | |||
1062 | 530 | # XXX these DispatchResult classes are badly named and do the | ||
1063 | 531 | # same thing. We need to fix that. | ||
1064 | 532 | self.logger.debug( | ||
1065 | 533 | '%s response for "%s": %s' % (slave, method, response)) | ||
1066 | 534 | |||
1067 | 535 | if isinstance(response, Failure): | ||
1068 | 536 | self.logger.warn( | ||
1069 | 537 | '%s communication failed (%s)' % | ||
1070 | 538 | (slave, response.getErrorMessage())) | ||
1071 | 539 | self.slaveConversationEnded() | ||
1072 | 540 | self._incrementFailureCounts(builder) | ||
1073 | 541 | return self.fail_result(slave) | ||
1074 | 542 | |||
1075 | 543 | if isinstance(response, list) and len(response) == 2: | ||
1076 | 544 | if method in buildd_success_result_map: | ||
1077 | 545 | expected_status = buildd_success_result_map.get(method) | ||
1078 | 546 | status, info = response | ||
1079 | 547 | if status == expected_status: | ||
1080 | 548 | self.callSlave(slave) | ||
1081 | 271 | return None | 549 | return None |
1087 | 272 | return d.addCallback(job_started) | 550 | else: |
1088 | 273 | 551 | info = 'Unknown slave method: %s' % method | |
1089 | 274 | d.addCallback(status_updated) | 552 | else: |
1090 | 275 | d.addCallback(build_updated) | 553 | info = 'Unexpected response: %s' % repr(response) |
1091 | 276 | return d | 554 | |
1092 | 555 | self.logger.error( | ||
1093 | 556 | '%s failed to dispatch (%s)' % (slave, info)) | ||
1094 | 557 | |||
1095 | 558 | self.slaveConversationEnded() | ||
1096 | 559 | self._incrementFailureCounts(builder) | ||
1097 | 560 | return self.fail_result(slave, info) | ||
1098 | 277 | 561 | ||
1099 | 278 | 562 | ||
1100 | 279 | class NewBuildersScanner: | 563 | class NewBuildersScanner: |
1101 | @@ -294,21 +578,15 @@ | |||
1102 | 294 | self.current_builders = [ | 578 | self.current_builders = [ |
1103 | 295 | builder.name for builder in getUtility(IBuilderSet)] | 579 | builder.name for builder in getUtility(IBuilderSet)] |
1104 | 296 | 580 | ||
1105 | 297 | def stop(self): | ||
1106 | 298 | """Terminate the LoopingCall.""" | ||
1107 | 299 | self.loop.stop() | ||
1108 | 300 | |||
1109 | 301 | def scheduleScan(self): | 581 | def scheduleScan(self): |
1110 | 302 | """Schedule a callback SCAN_INTERVAL seconds later.""" | 582 | """Schedule a callback SCAN_INTERVAL seconds later.""" |
1115 | 303 | self.loop = LoopingCall(self.scan) | 583 | return self._clock.callLater(self.SCAN_INTERVAL, self.scan) |
1112 | 304 | self.loop.clock = self._clock | ||
1113 | 305 | self.stopping_deferred = self.loop.start(self.SCAN_INTERVAL) | ||
1114 | 306 | return self.stopping_deferred | ||
1116 | 307 | 584 | ||
1117 | 308 | def scan(self): | 585 | def scan(self): |
1118 | 309 | """If a new builder appears, create a SlaveScanner for it.""" | 586 | """If a new builder appears, create a SlaveScanner for it.""" |
1119 | 310 | new_builders = self.checkForNewBuilders() | 587 | new_builders = self.checkForNewBuilders() |
1120 | 311 | self.manager.addScanForBuilders(new_builders) | 588 | self.manager.addScanForBuilders(new_builders) |
1121 | 589 | self.scheduleScan() | ||
1122 | 312 | 590 | ||
1123 | 313 | def checkForNewBuilders(self): | 591 | def checkForNewBuilders(self): |
1124 | 314 | """See if any new builders were added.""" | 592 | """See if any new builders were added.""" |
1125 | @@ -331,7 +609,10 @@ | |||
1126 | 331 | manager=self, clock=clock) | 609 | manager=self, clock=clock) |
1127 | 332 | 610 | ||
1128 | 333 | def _setupLogger(self): | 611 | def _setupLogger(self): |
1130 | 334 | """Set up a 'slave-scanner' logger that redirects to twisted. | 612 | """Setup a 'slave-scanner' logger that redirects to twisted. |
1131 | 613 | |||
1132 | 614 | It is going to be used locally and within the thread running | ||
1133 | 615 | the scan() method. | ||
1134 | 335 | 616 | ||
1135 | 336 | Make it less verbose to avoid messing too much with the old code. | 617 | Make it less verbose to avoid messing too much with the old code. |
1136 | 337 | """ | 618 | """ |
1137 | @@ -362,29 +643,12 @@ | |||
1138 | 362 | # Events will now fire in the SlaveScanner objects to scan each | 643 | # Events will now fire in the SlaveScanner objects to scan each |
1139 | 363 | # builder. | 644 | # builder. |
1140 | 364 | 645 | ||
1141 | 365 | def stopService(self): | ||
1142 | 366 | """Callback for when we need to shut down.""" | ||
1143 | 367 | # XXX: lacks unit tests | ||
1144 | 368 | # All the SlaveScanner objects need to be halted gracefully. | ||
1145 | 369 | deferreds = [slave.stopping_deferred for slave in self.builder_slaves] | ||
1146 | 370 | deferreds.append(self.new_builders_scanner.stopping_deferred) | ||
1147 | 371 | |||
1148 | 372 | self.new_builders_scanner.stop() | ||
1149 | 373 | for slave in self.builder_slaves: | ||
1150 | 374 | slave.stopCycle() | ||
1151 | 375 | |||
1152 | 376 | # The 'stopping_deferred's are called back when the loops are | ||
1153 | 377 | # stopped, so we can wait on them all at once here before | ||
1154 | 378 | # exiting. | ||
1155 | 379 | d = defer.DeferredList(deferreds, consumeErrors=True) | ||
1156 | 380 | return d | ||
1157 | 381 | |||
1158 | 382 | def addScanForBuilders(self, builders): | 646 | def addScanForBuilders(self, builders): |
1159 | 383 | """Set up scanner objects for the builders specified.""" | 647 | """Set up scanner objects for the builders specified.""" |
1160 | 384 | for builder in builders: | 648 | for builder in builders: |
1161 | 385 | slave_scanner = SlaveScanner(builder, self.logger) | 649 | slave_scanner = SlaveScanner(builder, self.logger) |
1162 | 386 | self.builder_slaves.append(slave_scanner) | 650 | self.builder_slaves.append(slave_scanner) |
1164 | 387 | slave_scanner.startCycle() | 651 | slave_scanner.scheduleNextScanCycle() |
1165 | 388 | 652 | ||
1166 | 389 | # Return the slave list for the benefit of tests. | 653 | # Return the slave list for the benefit of tests. |
1167 | 390 | return self.builder_slaves | 654 | return self.builder_slaves |
1168 | 391 | 655 | ||
1169 | === modified file 'lib/lp/buildmaster/model/builder.py' | |||
1170 | --- lib/lp/buildmaster/model/builder.py 2010-10-20 11:54:27 +0000 | |||
1171 | +++ lib/lp/buildmaster/model/builder.py 2010-12-07 16:24:04 +0000 | |||
1172 | @@ -13,11 +13,12 @@ | |||
1173 | 13 | ] | 13 | ] |
1174 | 14 | 14 | ||
1175 | 15 | import gzip | 15 | import gzip |
1176 | 16 | import httplib | ||
1177 | 16 | import logging | 17 | import logging |
1178 | 17 | import os | 18 | import os |
1179 | 18 | import socket | 19 | import socket |
1180 | 20 | import subprocess | ||
1181 | 19 | import tempfile | 21 | import tempfile |
1182 | 20 | import transaction | ||
1183 | 21 | import urllib2 | 22 | import urllib2 |
1184 | 22 | import xmlrpclib | 23 | import xmlrpclib |
1185 | 23 | 24 | ||
1186 | @@ -33,13 +34,6 @@ | |||
1187 | 33 | Count, | 34 | Count, |
1188 | 34 | Sum, | 35 | Sum, |
1189 | 35 | ) | 36 | ) |
1190 | 36 | |||
1191 | 37 | from twisted.internet import ( | ||
1192 | 38 | defer, | ||
1193 | 39 | reactor as default_reactor, | ||
1194 | 40 | ) | ||
1195 | 41 | from twisted.web import xmlrpc | ||
1196 | 42 | |||
1197 | 43 | from zope.component import getUtility | 37 | from zope.component import getUtility |
1198 | 44 | from zope.interface import implements | 38 | from zope.interface import implements |
1199 | 45 | 39 | ||
1200 | @@ -64,6 +58,7 @@ | |||
1201 | 64 | from lp.buildmaster.interfaces.builder import ( | 58 | from lp.buildmaster.interfaces.builder import ( |
1202 | 65 | BuildDaemonError, | 59 | BuildDaemonError, |
1203 | 66 | BuildSlaveFailure, | 60 | BuildSlaveFailure, |
1204 | 61 | CannotBuild, | ||
1205 | 67 | CannotFetchFile, | 62 | CannotFetchFile, |
1206 | 68 | CannotResumeHost, | 63 | CannotResumeHost, |
1207 | 69 | CorruptBuildCookie, | 64 | CorruptBuildCookie, |
1208 | @@ -71,6 +66,9 @@ | |||
1209 | 71 | IBuilderSet, | 66 | IBuilderSet, |
1210 | 72 | ) | 67 | ) |
1211 | 73 | from lp.buildmaster.interfaces.buildfarmjob import IBuildFarmJobSet | 68 | from lp.buildmaster.interfaces.buildfarmjob import IBuildFarmJobSet |
1212 | 69 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( | ||
1213 | 70 | BuildBehaviorMismatch, | ||
1214 | 71 | ) | ||
1215 | 74 | from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet | 72 | from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet |
1216 | 75 | from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior | 73 | from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior |
1217 | 76 | from lp.buildmaster.model.buildqueue import ( | 74 | from lp.buildmaster.model.buildqueue import ( |
1218 | @@ -80,9 +78,9 @@ | |||
1219 | 80 | from lp.registry.interfaces.person import validate_public_person | 78 | from lp.registry.interfaces.person import validate_public_person |
1220 | 81 | from lp.services.job.interfaces.job import JobStatus | 79 | from lp.services.job.interfaces.job import JobStatus |
1221 | 82 | from lp.services.job.model.job import Job | 80 | from lp.services.job.model.job import Job |
1222 | 81 | from lp.services.osutils import until_no_eintr | ||
1223 | 83 | from lp.services.propertycache import cachedproperty | 82 | from lp.services.propertycache import cachedproperty |
1226 | 84 | from lp.services.twistedsupport.processmonitor import ProcessWithTimeout | 83 | from lp.services.twistedsupport.xmlrpc import BlockingProxy |
1225 | 85 | from lp.services.twistedsupport import cancel_on_timeout | ||
1227 | 86 | # XXX Michael Nelson 2010-01-13 bug=491330 | 84 | # XXX Michael Nelson 2010-01-13 bug=491330 |
1228 | 87 | # These dependencies on soyuz will be removed when getBuildRecords() | 85 | # These dependencies on soyuz will be removed when getBuildRecords() |
1229 | 88 | # is moved. | 86 | # is moved. |
1230 | @@ -94,9 +92,25 @@ | |||
1231 | 94 | from lp.soyuz.model.processor import Processor | 92 | from lp.soyuz.model.processor import Processor |
1232 | 95 | 93 | ||
1233 | 96 | 94 | ||
1237 | 97 | class QuietQueryFactory(xmlrpc._QueryFactory): | 95 | class TimeoutHTTPConnection(httplib.HTTPConnection): |
1238 | 98 | """XMLRPC client factory that doesn't splatter the log with junk.""" | 96 | |
1239 | 99 | noisy = False | 97 | def connect(self): |
1240 | 98 | """Override the standard connect() methods to set a timeout""" | ||
1241 | 99 | ret = httplib.HTTPConnection.connect(self) | ||
1242 | 100 | self.sock.settimeout(config.builddmaster.socket_timeout) | ||
1243 | 101 | return ret | ||
1244 | 102 | |||
1245 | 103 | |||
1246 | 104 | class TimeoutHTTP(httplib.HTTP): | ||
1247 | 105 | _connection_class = TimeoutHTTPConnection | ||
1248 | 106 | |||
1249 | 107 | |||
1250 | 108 | class TimeoutTransport(xmlrpclib.Transport): | ||
1251 | 109 | """XMLRPC Transport to setup a socket with defined timeout""" | ||
1252 | 110 | |||
1253 | 111 | def make_connection(self, host): | ||
1254 | 112 | host, extra_headers, x509 = self.get_host_info(host) | ||
1255 | 113 | return TimeoutHTTP(host) | ||
1256 | 100 | 114 | ||
1257 | 101 | 115 | ||
1258 | 102 | class BuilderSlave(object): | 116 | class BuilderSlave(object): |
1259 | @@ -111,7 +125,24 @@ | |||
1260 | 111 | # many false positives in your test run and will most likely break | 125 | # many false positives in your test run and will most likely break |
1261 | 112 | # production. | 126 | # production. |
1262 | 113 | 127 | ||
1264 | 114 | def __init__(self, proxy, builder_url, vm_host, reactor=None): | 128 | # XXX: This (BuilderSlave) should use composition, rather than |
1265 | 129 | # inheritance. | ||
1266 | 130 | |||
1267 | 131 | # XXX: Have a documented interface for the XML-RPC server: | ||
1268 | 132 | # - what methods | ||
1269 | 133 | # - what return values expected | ||
1270 | 134 | # - what faults | ||
1271 | 135 | # (see XMLRPCBuildDSlave in lib/canonical/buildd/slave.py). | ||
1272 | 136 | |||
1273 | 137 | # XXX: Arguably, this interface should be asynchronous | ||
1274 | 138 | # (i.e. Deferred-returning). This would mean that Builder (see below) | ||
1275 | 139 | # would have to expect Deferreds. | ||
1276 | 140 | |||
1277 | 141 | # XXX: Once we have a client object with a defined, tested interface, we | ||
1278 | 142 | # should make a test double that doesn't do any XML-RPC and can be used to | ||
1279 | 143 | # make testing easier & tests faster. | ||
1280 | 144 | |||
1281 | 145 | def __init__(self, proxy, builder_url, vm_host): | ||
1282 | 115 | """Initialize a BuilderSlave. | 146 | """Initialize a BuilderSlave. |
1283 | 116 | 147 | ||
1284 | 117 | :param proxy: An XML-RPC proxy, implementing 'callRemote'. It must | 148 | :param proxy: An XML-RPC proxy, implementing 'callRemote'. It must |
1285 | @@ -124,87 +155,63 @@ | |||
1286 | 124 | self._file_cache_url = urlappend(builder_url, 'filecache') | 155 | self._file_cache_url = urlappend(builder_url, 'filecache') |
1287 | 125 | self._server = proxy | 156 | self._server = proxy |
1288 | 126 | 157 | ||
1289 | 127 | if reactor is None: | ||
1290 | 128 | self.reactor = default_reactor | ||
1291 | 129 | else: | ||
1292 | 130 | self.reactor = reactor | ||
1293 | 131 | |||
1294 | 132 | @classmethod | 158 | @classmethod |
1315 | 133 | def makeBuilderSlave(cls, builder_url, vm_host, reactor=None, proxy=None): | 159 | def makeBlockingSlave(cls, builder_url, vm_host): |
1316 | 134 | """Create and return a `BuilderSlave`. | 160 | rpc_url = urlappend(builder_url, 'rpc') |
1317 | 135 | 161 | server_proxy = xmlrpclib.ServerProxy( | |
1318 | 136 | :param builder_url: The URL of the slave buildd machine, | 162 | rpc_url, transport=TimeoutTransport(), allow_none=True) |
1319 | 137 | e.g. http://localhost:8221 | 163 | return cls(BlockingProxy(server_proxy), builder_url, vm_host) |
1300 | 138 | :param vm_host: If the slave is virtual, specify its host machine here. | ||
1301 | 139 | :param reactor: Used by tests to override the Twisted reactor. | ||
1302 | 140 | :param proxy: Used By tests to override the xmlrpc.Proxy. | ||
1303 | 141 | """ | ||
1304 | 142 | rpc_url = urlappend(builder_url.encode('utf-8'), 'rpc') | ||
1305 | 143 | if proxy is None: | ||
1306 | 144 | server_proxy = xmlrpc.Proxy(rpc_url, allowNone=True) | ||
1307 | 145 | server_proxy.queryFactory = QuietQueryFactory | ||
1308 | 146 | else: | ||
1309 | 147 | server_proxy = proxy | ||
1310 | 148 | return cls(server_proxy, builder_url, vm_host, reactor) | ||
1311 | 149 | |||
1312 | 150 | def _with_timeout(self, d): | ||
1313 | 151 | TIMEOUT = config.builddmaster.socket_timeout | ||
1314 | 152 | return cancel_on_timeout(d, TIMEOUT, self.reactor) | ||
1320 | 153 | 164 | ||
1321 | 154 | def abort(self): | 165 | def abort(self): |
1322 | 155 | """Abort the current build.""" | 166 | """Abort the current build.""" |
1324 | 156 | return self._with_timeout(self._server.callRemote('abort')) | 167 | return self._server.callRemote('abort') |
1325 | 157 | 168 | ||
1326 | 158 | def clean(self): | 169 | def clean(self): |
1327 | 159 | """Clean up the waiting files and reset the slave's internal state.""" | 170 | """Clean up the waiting files and reset the slave's internal state.""" |
1329 | 160 | return self._with_timeout(self._server.callRemote('clean')) | 171 | return self._server.callRemote('clean') |
1330 | 161 | 172 | ||
1331 | 162 | def echo(self, *args): | 173 | def echo(self, *args): |
1332 | 163 | """Echo the arguments back.""" | 174 | """Echo the arguments back.""" |
1334 | 164 | return self._with_timeout(self._server.callRemote('echo', *args)) | 175 | return self._server.callRemote('echo', *args) |
1335 | 165 | 176 | ||
1336 | 166 | def info(self): | 177 | def info(self): |
1337 | 167 | """Return the protocol version and the builder methods supported.""" | 178 | """Return the protocol version and the builder methods supported.""" |
1339 | 168 | return self._with_timeout(self._server.callRemote('info')) | 179 | return self._server.callRemote('info') |
1340 | 169 | 180 | ||
1341 | 170 | def status(self): | 181 | def status(self): |
1342 | 171 | """Return the status of the build daemon.""" | 182 | """Return the status of the build daemon.""" |
1344 | 172 | return self._with_timeout(self._server.callRemote('status')) | 183 | return self._server.callRemote('status') |
1345 | 173 | 184 | ||
1346 | 174 | def ensurepresent(self, sha1sum, url, username, password): | 185 | def ensurepresent(self, sha1sum, url, username, password): |
1347 | 175 | # XXX: Nothing external calls this. Make it private. | ||
1348 | 176 | """Attempt to ensure the given file is present.""" | 186 | """Attempt to ensure the given file is present.""" |
1351 | 177 | return self._with_timeout(self._server.callRemote( | 187 | return self._server.callRemote( |
1352 | 178 | 'ensurepresent', sha1sum, url, username, password)) | 188 | 'ensurepresent', sha1sum, url, username, password) |
1353 | 179 | 189 | ||
1354 | 180 | def getFile(self, sha_sum): | 190 | def getFile(self, sha_sum): |
1355 | 181 | """Construct a file-like object to return the named file.""" | 191 | """Construct a file-like object to return the named file.""" |
1356 | 182 | # XXX 2010-10-18 bug=662631 | ||
1357 | 183 | # Change this to do non-blocking IO. | ||
1358 | 184 | file_url = urlappend(self._file_cache_url, sha_sum) | 192 | file_url = urlappend(self._file_cache_url, sha_sum) |
1359 | 185 | return urllib2.urlopen(file_url) | 193 | return urllib2.urlopen(file_url) |
1360 | 186 | 194 | ||
1372 | 187 | def resume(self, clock=None): | 195 | def resume(self): |
1373 | 188 | """Resume the builder in an asynchronous fashion. | 196 | """Resume a virtual builder. |
1374 | 189 | 197 | ||
1375 | 190 | We use the builddmaster configuration 'socket_timeout' as | 198 | It uses the configuration command-line (replacing 'vm_host') and |
1376 | 191 | the process timeout. | 199 | return its output. |
1377 | 192 | 200 | ||
1378 | 193 | :param clock: An optional twisted.internet.task.Clock to override | 201 | :return: a (stdout, stderr, subprocess exitcode) triple |
1368 | 194 | the default clock. For use in tests. | ||
1369 | 195 | |||
1370 | 196 | :return: a Deferred that returns a | ||
1371 | 197 | (stdout, stderr, subprocess exitcode) triple | ||
1379 | 198 | """ | 202 | """ |
1380 | 203 | # XXX: This executes the vm_resume_command | ||
1381 | 204 | # synchronously. RecordingSlave does so asynchronously. Since we | ||
1382 | 205 | # always want to do this asynchronously, there's no need for the | ||
1383 | 206 | # duplication. | ||
1384 | 199 | resume_command = config.builddmaster.vm_resume_command % { | 207 | resume_command = config.builddmaster.vm_resume_command % { |
1385 | 200 | 'vm_host': self._vm_host} | 208 | 'vm_host': self._vm_host} |
1393 | 201 | # Twisted API requires string but the configuration provides unicode. | 209 | resume_argv = resume_command.split() |
1394 | 202 | resume_argv = [term.encode('utf-8') for term in resume_command.split()] | 210 | resume_process = subprocess.Popen( |
1395 | 203 | d = defer.Deferred() | 211 | resume_argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
1396 | 204 | p = ProcessWithTimeout( | 212 | stdout, stderr = resume_process.communicate() |
1397 | 205 | d, config.builddmaster.socket_timeout, clock=clock) | 213 | |
1398 | 206 | p.spawnProcess(resume_argv[0], tuple(resume_argv)) | 214 | return (stdout, stderr, resume_process.returncode) |
1392 | 207 | return d | ||
1399 | 208 | 215 | ||
1400 | 209 | def cacheFile(self, logger, libraryfilealias): | 216 | def cacheFile(self, logger, libraryfilealias): |
1401 | 210 | """Make sure that the file at 'libraryfilealias' is on the slave. | 217 | """Make sure that the file at 'libraryfilealias' is on the slave. |
1402 | @@ -217,15 +224,13 @@ | |||
1403 | 217 | "Asking builder on %s to ensure it has file %s (%s, %s)" % ( | 224 | "Asking builder on %s to ensure it has file %s (%s, %s)" % ( |
1404 | 218 | self._file_cache_url, libraryfilealias.filename, url, | 225 | self._file_cache_url, libraryfilealias.filename, url, |
1405 | 219 | libraryfilealias.content.sha1)) | 226 | libraryfilealias.content.sha1)) |
1407 | 220 | return self.sendFileToSlave(libraryfilealias.content.sha1, url) | 227 | self.sendFileToSlave(libraryfilealias.content.sha1, url) |
1408 | 221 | 228 | ||
1409 | 222 | def sendFileToSlave(self, sha1, url, username="", password=""): | 229 | def sendFileToSlave(self, sha1, url, username="", password=""): |
1410 | 223 | """Helper to send the file at 'url' with 'sha1' to this builder.""" | 230 | """Helper to send the file at 'url' with 'sha1' to this builder.""" |
1416 | 224 | d = self.ensurepresent(sha1, url, username, password) | 231 | present, info = self.ensurepresent(sha1, url, username, password) |
1417 | 225 | def check_present((present, info)): | 232 | if not present: |
1418 | 226 | if not present: | 233 | raise CannotFetchFile(url, info) |
1414 | 227 | raise CannotFetchFile(url, info) | ||
1415 | 228 | return d.addCallback(check_present) | ||
1419 | 229 | 234 | ||
1420 | 230 | def build(self, buildid, builder_type, chroot_sha1, filemap, args): | 235 | def build(self, buildid, builder_type, chroot_sha1, filemap, args): |
1421 | 231 | """Build a thing on this build slave. | 236 | """Build a thing on this build slave. |
1422 | @@ -238,18 +243,19 @@ | |||
1423 | 238 | :param args: A dictionary of extra arguments. The contents depend on | 243 | :param args: A dictionary of extra arguments. The contents depend on |
1424 | 239 | the build job type. | 244 | the build job type. |
1425 | 240 | """ | 245 | """ |
1432 | 241 | d = self._with_timeout(self._server.callRemote( | 246 | try: |
1433 | 242 | 'build', buildid, builder_type, chroot_sha1, filemap, args)) | 247 | return self._server.callRemote( |
1434 | 243 | def got_fault(failure): | 248 | 'build', buildid, builder_type, chroot_sha1, filemap, args) |
1435 | 244 | failure.trap(xmlrpclib.Fault) | 249 | except xmlrpclib.Fault, info: |
1436 | 245 | raise BuildSlaveFailure(failure.value) | 250 | raise BuildSlaveFailure(info) |
1431 | 246 | return d.addErrback(got_fault) | ||
1437 | 247 | 251 | ||
1438 | 248 | 252 | ||
1439 | 249 | # This is a separate function since MockBuilder needs to use it too. | 253 | # This is a separate function since MockBuilder needs to use it too. |
1440 | 250 | # Do not use it -- (Mock)Builder.rescueIfLost should be used instead. | 254 | # Do not use it -- (Mock)Builder.rescueIfLost should be used instead. |
1441 | 251 | def rescueBuilderIfLost(builder, logger=None): | 255 | def rescueBuilderIfLost(builder, logger=None): |
1442 | 252 | """See `IBuilder`.""" | 256 | """See `IBuilder`.""" |
1443 | 257 | status_sentence = builder.slaveStatusSentence() | ||
1444 | 258 | |||
1445 | 253 | # 'ident_position' dict relates the position of the job identifier | 259 | # 'ident_position' dict relates the position of the job identifier |
1446 | 254 | # token in the sentence received from status(), according the | 260 | # token in the sentence received from status(), according the |
1447 | 255 | # two status we care about. See see lib/canonical/buildd/slave.py | 261 | # two status we care about. See see lib/canonical/buildd/slave.py |
1448 | @@ -259,58 +265,61 @@ | |||
1449 | 259 | 'BuilderStatus.WAITING': 2 | 265 | 'BuilderStatus.WAITING': 2 |
1450 | 260 | } | 266 | } |
1451 | 261 | 267 | ||
1478 | 262 | d = builder.slaveStatusSentence() | 268 | # Isolate the BuilderStatus string, always the first token in |
1479 | 263 | 269 | # see lib/canonical/buildd/slave.py and | |
1480 | 264 | def got_status(status_sentence): | 270 | # IBuilder.slaveStatusSentence(). |
1481 | 265 | """After we get the status, clean if we have to. | 271 | status = status_sentence[0] |
1482 | 266 | 272 | ||
1483 | 267 | Always return status_sentence. | 273 | # If the cookie test below fails, it will request an abort of the |
1484 | 268 | """ | 274 | # builder. This will leave the builder in the aborted state and |
1485 | 269 | # Isolate the BuilderStatus string, always the first token in | 275 | # with no assigned job, and we should now "clean" the slave which |
1486 | 270 | # see lib/canonical/buildd/slave.py and | 276 | # will reset its state back to IDLE, ready to accept new builds. |
1487 | 271 | # IBuilder.slaveStatusSentence(). | 277 | # This situation is usually caused by a temporary loss of |
1488 | 272 | status = status_sentence[0] | 278 | # communications with the slave and the build manager had to reset |
1489 | 273 | 279 | # the job. | |
1490 | 274 | # If the cookie test below fails, it will request an abort of the | 280 | if status == 'BuilderStatus.ABORTED' and builder.currentjob is None: |
1491 | 275 | # builder. This will leave the builder in the aborted state and | 281 | builder.cleanSlave() |
1492 | 276 | # with no assigned job, and we should now "clean" the slave which | 282 | if logger is not None: |
1493 | 277 | # will reset its state back to IDLE, ready to accept new builds. | 283 | logger.info( |
1494 | 278 | # This situation is usually caused by a temporary loss of | 284 | "Builder '%s' cleaned up from ABORTED" % builder.name) |
1495 | 279 | # communications with the slave and the build manager had to reset | 285 | return |
1496 | 280 | # the job. | 286 | |
1497 | 281 | if status == 'BuilderStatus.ABORTED' and builder.currentjob is None: | 287 | # If slave is not building nor waiting, it's not in need of rescuing. |
1498 | 282 | if logger is not None: | 288 | if status not in ident_position.keys(): |
1499 | 283 | logger.info( | 289 | return |
1500 | 284 | "Builder '%s' being cleaned up from ABORTED" % | 290 | |
1501 | 285 | (builder.name,)) | 291 | slave_build_id = status_sentence[ident_position[status]] |
1502 | 286 | d = builder.cleanSlave() | 292 | |
1503 | 287 | return d.addCallback(lambda ignored: status_sentence) | 293 | try: |
1504 | 294 | builder.verifySlaveBuildCookie(slave_build_id) | ||
1505 | 295 | except CorruptBuildCookie, reason: | ||
1506 | 296 | if status == 'BuilderStatus.WAITING': | ||
1507 | 297 | builder.cleanSlave() | ||
1508 | 288 | else: | 298 | else: |
1534 | 289 | return status_sentence | 299 | builder.requestAbort() |
1535 | 290 | 300 | if logger: | |
1536 | 291 | def rescue_slave(status_sentence): | 301 | logger.info( |
1537 | 292 | # If slave is not building nor waiting, it's not in need of rescuing. | 302 | "Builder '%s' rescued from '%s': '%s'" % |
1538 | 293 | status = status_sentence[0] | 303 | (builder.name, slave_build_id, reason)) |
1539 | 294 | if status not in ident_position.keys(): | 304 | |
1540 | 295 | return | 305 | |
1541 | 296 | slave_build_id = status_sentence[ident_position[status]] | 306 | def _update_builder_status(builder, logger=None): |
1542 | 297 | try: | 307 | """Really update the builder status.""" |
1543 | 298 | builder.verifySlaveBuildCookie(slave_build_id) | 308 | try: |
1544 | 299 | except CorruptBuildCookie, reason: | 309 | builder.checkSlaveAlive() |
1545 | 300 | if status == 'BuilderStatus.WAITING': | 310 | builder.rescueIfLost(logger) |
1546 | 301 | d = builder.cleanSlave() | 311 | # Catch only known exceptions. |
1547 | 302 | else: | 312 | # XXX cprov 2007-06-15 bug=120571: ValueError & TypeError catching is |
1548 | 303 | d = builder.requestAbort() | 313 | # disturbing in this context. We should spend sometime sanitizing the |
1549 | 304 | def log_rescue(ignored): | 314 | # exceptions raised in the Builder API since we already started the |
1550 | 305 | if logger: | 315 | # main refactoring of this area. |
1551 | 306 | logger.info( | 316 | except (ValueError, TypeError, xmlrpclib.Fault, |
1552 | 307 | "Builder '%s' rescued from '%s': '%s'" % | 317 | BuildDaemonError), reason: |
1553 | 308 | (builder.name, slave_build_id, reason)) | 318 | builder.failBuilder(str(reason)) |
1554 | 309 | return d.addCallback(log_rescue) | 319 | if logger: |
1555 | 310 | 320 | logger.warn( | |
1556 | 311 | d.addCallback(got_status) | 321 | "%s (%s) marked as failed due to: %s", |
1557 | 312 | d.addCallback(rescue_slave) | 322 | builder.name, builder.url, builder.failnotes, exc_info=True) |
1533 | 313 | return d | ||
1558 | 314 | 323 | ||
1559 | 315 | 324 | ||
1560 | 316 | def updateBuilderStatus(builder, logger=None): | 325 | def updateBuilderStatus(builder, logger=None): |
1561 | @@ -318,7 +327,16 @@ | |||
1562 | 318 | if logger: | 327 | if logger: |
1563 | 319 | logger.debug('Checking %s' % builder.name) | 328 | logger.debug('Checking %s' % builder.name) |
1564 | 320 | 329 | ||
1566 | 321 | return builder.rescueIfLost(logger) | 330 | MAX_EINTR_RETRIES = 42 # pulling a number out of my a$$ here |
1567 | 331 | try: | ||
1568 | 332 | return until_no_eintr( | ||
1569 | 333 | MAX_EINTR_RETRIES, _update_builder_status, builder, logger=logger) | ||
1570 | 334 | except socket.error, reason: | ||
1571 | 335 | # In Python 2.6 we can use IOError instead. It also has | ||
1572 | 336 | # reason.errno but we might be using 2.5 here so use the | ||
1573 | 337 | # index hack. | ||
1574 | 338 | error_message = str(reason) | ||
1575 | 339 | builder.handleTimeout(logger, error_message) | ||
1576 | 322 | 340 | ||
1577 | 323 | 341 | ||
1578 | 324 | class Builder(SQLBase): | 342 | class Builder(SQLBase): |
1579 | @@ -346,10 +364,6 @@ | |||
1580 | 346 | active = BoolCol(dbName='active', notNull=True, default=True) | 364 | active = BoolCol(dbName='active', notNull=True, default=True) |
1581 | 347 | failure_count = IntCol(dbName='failure_count', default=0, notNull=True) | 365 | failure_count = IntCol(dbName='failure_count', default=0, notNull=True) |
1582 | 348 | 366 | ||
1583 | 349 | # The number of times a builder can consecutively fail before we | ||
1584 | 350 | # give up and mark it builderok=False. | ||
1585 | 351 | FAILURE_THRESHOLD = 5 | ||
1586 | 352 | |||
1587 | 353 | def _getCurrentBuildBehavior(self): | 367 | def _getCurrentBuildBehavior(self): |
1588 | 354 | """Return the current build behavior.""" | 368 | """Return the current build behavior.""" |
1589 | 355 | if not safe_hasattr(self, '_current_build_behavior'): | 369 | if not safe_hasattr(self, '_current_build_behavior'): |
1590 | @@ -395,13 +409,18 @@ | |||
1591 | 395 | """See `IBuilder`.""" | 409 | """See `IBuilder`.""" |
1592 | 396 | self.failure_count = 0 | 410 | self.failure_count = 0 |
1593 | 397 | 411 | ||
1594 | 412 | def checkSlaveAlive(self): | ||
1595 | 413 | """See IBuilder.""" | ||
1596 | 414 | if self.slave.echo("Test")[0] != "Test": | ||
1597 | 415 | raise BuildDaemonError("Failed to echo OK") | ||
1598 | 416 | |||
1599 | 398 | def rescueIfLost(self, logger=None): | 417 | def rescueIfLost(self, logger=None): |
1600 | 399 | """See `IBuilder`.""" | 418 | """See `IBuilder`.""" |
1602 | 400 | return rescueBuilderIfLost(self, logger) | 419 | rescueBuilderIfLost(self, logger) |
1603 | 401 | 420 | ||
1604 | 402 | def updateStatus(self, logger=None): | 421 | def updateStatus(self, logger=None): |
1605 | 403 | """See `IBuilder`.""" | 422 | """See `IBuilder`.""" |
1607 | 404 | return updateBuilderStatus(self, logger) | 423 | updateBuilderStatus(self, logger) |
1608 | 405 | 424 | ||
1609 | 406 | def cleanSlave(self): | 425 | def cleanSlave(self): |
1610 | 407 | """See IBuilder.""" | 426 | """See IBuilder.""" |
1611 | @@ -421,23 +440,20 @@ | |||
1612 | 421 | def resumeSlaveHost(self): | 440 | def resumeSlaveHost(self): |
1613 | 422 | """See IBuilder.""" | 441 | """See IBuilder.""" |
1614 | 423 | if not self.virtualized: | 442 | if not self.virtualized: |
1616 | 424 | return defer.fail(CannotResumeHost('Builder is not virtualized.')) | 443 | raise CannotResumeHost('Builder is not virtualized.') |
1617 | 425 | 444 | ||
1618 | 426 | if not self.vm_host: | 445 | if not self.vm_host: |
1620 | 427 | return defer.fail(CannotResumeHost('Undefined vm_host.')) | 446 | raise CannotResumeHost('Undefined vm_host.') |
1621 | 428 | 447 | ||
1622 | 429 | logger = self._getSlaveScannerLogger() | 448 | logger = self._getSlaveScannerLogger() |
1623 | 430 | logger.debug("Resuming %s (%s)" % (self.name, self.url)) | 449 | logger.debug("Resuming %s (%s)" % (self.name, self.url)) |
1624 | 431 | 450 | ||
1630 | 432 | d = self.slave.resume() | 451 | stdout, stderr, returncode = self.slave.resume() |
1631 | 433 | def got_resume_ok((stdout, stderr, returncode)): | 452 | if returncode != 0: |
1627 | 434 | return stdout, stderr | ||
1628 | 435 | def got_resume_bad(failure): | ||
1629 | 436 | stdout, stderr, code = failure.value | ||
1632 | 437 | raise CannotResumeHost( | 453 | raise CannotResumeHost( |
1633 | 438 | "Resuming failed:\nOUT:\n%s\nERR:\n%s\n" % (stdout, stderr)) | 454 | "Resuming failed:\nOUT:\n%s\nERR:\n%s\n" % (stdout, stderr)) |
1634 | 439 | 455 | ||
1636 | 440 | return d.addCallback(got_resume_ok).addErrback(got_resume_bad) | 456 | return stdout, stderr |
1637 | 441 | 457 | ||
1638 | 442 | @cachedproperty | 458 | @cachedproperty |
1639 | 443 | def slave(self): | 459 | def slave(self): |
1640 | @@ -446,7 +462,7 @@ | |||
1641 | 446 | # the slave object, which is usually an XMLRPC client, with a | 462 | # the slave object, which is usually an XMLRPC client, with a |
1642 | 447 | # stub object that removes the need to actually create a buildd | 463 | # stub object that removes the need to actually create a buildd |
1643 | 448 | # slave in various states - which can be hard to create. | 464 | # slave in various states - which can be hard to create. |
1645 | 449 | return BuilderSlave.makeBuilderSlave(self.url, self.vm_host) | 465 | return BuilderSlave.makeBlockingSlave(self.url, self.vm_host) |
1646 | 450 | 466 | ||
1647 | 451 | def setSlaveForTesting(self, proxy): | 467 | def setSlaveForTesting(self, proxy): |
1648 | 452 | """See IBuilder.""" | 468 | """See IBuilder.""" |
1649 | @@ -467,23 +483,18 @@ | |||
1650 | 467 | 483 | ||
1651 | 468 | # If we are building a virtual build, resume the virtual machine. | 484 | # If we are building a virtual build, resume the virtual machine. |
1652 | 469 | if self.virtualized: | 485 | if self.virtualized: |
1656 | 470 | d = self.resumeSlaveHost() | 486 | self.resumeSlaveHost() |
1654 | 471 | else: | ||
1655 | 472 | d = defer.succeed(None) | ||
1657 | 473 | 487 | ||
1660 | 474 | def resume_done(ignored): | 488 | # Do it. |
1661 | 475 | return self.current_build_behavior.dispatchBuildToSlave( | 489 | build_queue_item.markAsBuilding(self) |
1662 | 490 | try: | ||
1663 | 491 | self.current_build_behavior.dispatchBuildToSlave( | ||
1664 | 476 | build_queue_item.id, logger) | 492 | build_queue_item.id, logger) |
1669 | 477 | 493 | except BuildSlaveFailure, e: | |
1670 | 478 | def eb_slave_failure(failure): | 494 | logger.debug("Disabling builder: %s" % self.url, exc_info=1) |
1667 | 479 | failure.trap(BuildSlaveFailure) | ||
1668 | 480 | e = failure.value | ||
1671 | 481 | self.failBuilder( | 495 | self.failBuilder( |
1672 | 482 | "Exception (%s) when setting up to new job" % (e,)) | 496 | "Exception (%s) when setting up to new job" % (e,)) |
1677 | 483 | 497 | except CannotFetchFile, e: | |
1674 | 484 | def eb_cannot_fetch_file(failure): | ||
1675 | 485 | failure.trap(CannotFetchFile) | ||
1676 | 486 | e = failure.value | ||
1678 | 487 | message = """Slave '%s' (%s) was unable to fetch file. | 498 | message = """Slave '%s' (%s) was unable to fetch file. |
1679 | 488 | ****** URL ******** | 499 | ****** URL ******** |
1680 | 489 | %s | 500 | %s |
1681 | @@ -492,19 +503,10 @@ | |||
1682 | 492 | ******************* | 503 | ******************* |
1683 | 493 | """ % (self.name, self.url, e.file_url, e.error_information) | 504 | """ % (self.name, self.url, e.file_url, e.error_information) |
1684 | 494 | raise BuildDaemonError(message) | 505 | raise BuildDaemonError(message) |
1689 | 495 | 506 | except socket.error, e: | |
1686 | 496 | def eb_socket_error(failure): | ||
1687 | 497 | failure.trap(socket.error) | ||
1688 | 498 | e = failure.value | ||
1690 | 499 | error_message = "Exception (%s) when setting up new job" % (e,) | 507 | error_message = "Exception (%s) when setting up new job" % (e,) |
1699 | 500 | d = self.handleTimeout(logger, error_message) | 508 | self.handleTimeout(logger, error_message) |
1700 | 501 | return d.addBoth(lambda ignored: failure) | 509 | raise BuildSlaveFailure |
1693 | 502 | |||
1694 | 503 | d.addCallback(resume_done) | ||
1695 | 504 | d.addErrback(eb_slave_failure) | ||
1696 | 505 | d.addErrback(eb_cannot_fetch_file) | ||
1697 | 506 | d.addErrback(eb_socket_error) | ||
1698 | 507 | return d | ||
1701 | 508 | 510 | ||
1702 | 509 | def failBuilder(self, reason): | 511 | def failBuilder(self, reason): |
1703 | 510 | """See IBuilder""" | 512 | """See IBuilder""" |
1704 | @@ -532,24 +534,22 @@ | |||
1705 | 532 | 534 | ||
1706 | 533 | def slaveStatus(self): | 535 | def slaveStatus(self): |
1707 | 534 | """See IBuilder.""" | 536 | """See IBuilder.""" |
1726 | 535 | d = self.slave.status() | 537 | builder_version, builder_arch, mechanisms = self.slave.info() |
1727 | 536 | def got_status(status_sentence): | 538 | status_sentence = self.slave.status() |
1728 | 537 | status = {'builder_status': status_sentence[0]} | 539 | |
1729 | 538 | 540 | status = {'builder_status': status_sentence[0]} | |
1730 | 539 | # Extract detailed status and log information if present. | 541 | |
1731 | 540 | # Although build_id is also easily extractable here, there is no | 542 | # Extract detailed status and log information if present. |
1732 | 541 | # valid reason for anything to use it, so we exclude it. | 543 | # Although build_id is also easily extractable here, there is no |
1733 | 542 | if status['builder_status'] == 'BuilderStatus.WAITING': | 544 | # valid reason for anything to use it, so we exclude it. |
1734 | 543 | status['build_status'] = status_sentence[1] | 545 | if status['builder_status'] == 'BuilderStatus.WAITING': |
1735 | 544 | else: | 546 | status['build_status'] = status_sentence[1] |
1736 | 545 | if status['builder_status'] == 'BuilderStatus.BUILDING': | 547 | else: |
1737 | 546 | status['logtail'] = status_sentence[2] | 548 | if status['builder_status'] == 'BuilderStatus.BUILDING': |
1738 | 547 | 549 | status['logtail'] = status_sentence[2] | |
1739 | 548 | self.current_build_behavior.updateSlaveStatus( | 550 | |
1740 | 549 | status_sentence, status) | 551 | self.current_build_behavior.updateSlaveStatus(status_sentence, status) |
1741 | 550 | return status | 552 | return status |
1724 | 551 | |||
1725 | 552 | return d.addCallback(got_status) | ||
1742 | 553 | 553 | ||
1743 | 554 | def slaveStatusSentence(self): | 554 | def slaveStatusSentence(self): |
1744 | 555 | """See IBuilder.""" | 555 | """See IBuilder.""" |
1745 | @@ -562,15 +562,13 @@ | |||
1746 | 562 | 562 | ||
1747 | 563 | def updateBuild(self, queueItem): | 563 | def updateBuild(self, queueItem): |
1748 | 564 | """See `IBuilder`.""" | 564 | """See `IBuilder`.""" |
1750 | 565 | return self.current_build_behavior.updateBuild(queueItem) | 565 | self.current_build_behavior.updateBuild(queueItem) |
1751 | 566 | 566 | ||
1752 | 567 | def transferSlaveFileToLibrarian(self, file_sha1, filename, private): | 567 | def transferSlaveFileToLibrarian(self, file_sha1, filename, private): |
1753 | 568 | """See IBuilder.""" | 568 | """See IBuilder.""" |
1754 | 569 | out_file_fd, out_file_name = tempfile.mkstemp(suffix=".buildlog") | 569 | out_file_fd, out_file_name = tempfile.mkstemp(suffix=".buildlog") |
1755 | 570 | out_file = os.fdopen(out_file_fd, "r+") | 570 | out_file = os.fdopen(out_file_fd, "r+") |
1756 | 571 | try: | 571 | try: |
1757 | 572 | # XXX 2010-10-18 bug=662631 | ||
1758 | 573 | # Change this to do non-blocking IO. | ||
1759 | 574 | slave_file = self.slave.getFile(file_sha1) | 572 | slave_file = self.slave.getFile(file_sha1) |
1760 | 575 | copy_and_close(slave_file, out_file) | 573 | copy_and_close(slave_file, out_file) |
1761 | 576 | # If the requested file is the 'buildlog' compress it using gzip | 574 | # If the requested file is the 'buildlog' compress it using gzip |
1762 | @@ -601,17 +599,18 @@ | |||
1763 | 601 | 599 | ||
1764 | 602 | return library_file.id | 600 | return library_file.id |
1765 | 603 | 601 | ||
1767 | 604 | def isAvailable(self): | 602 | @property |
1768 | 603 | def is_available(self): | ||
1769 | 605 | """See `IBuilder`.""" | 604 | """See `IBuilder`.""" |
1770 | 606 | if not self.builderok: | 605 | if not self.builderok: |
1779 | 607 | return defer.succeed(False) | 606 | return False |
1780 | 608 | d = self.slaveStatusSentence() | 607 | try: |
1781 | 609 | def catch_fault(failure): | 608 | slavestatus = self.slaveStatusSentence() |
1782 | 610 | failure.trap(xmlrpclib.Fault, socket.error) | 609 | except (xmlrpclib.Fault, socket.error): |
1783 | 611 | return False | 610 | return False |
1784 | 612 | def check_available(status): | 611 | if slavestatus[0] != BuilderStatus.IDLE: |
1785 | 613 | return status[0] == BuilderStatus.IDLE | 612 | return False |
1786 | 614 | return d.addCallbacks(check_available, catch_fault) | 613 | return True |
1787 | 615 | 614 | ||
1788 | 616 | def _getSlaveScannerLogger(self): | 615 | def _getSlaveScannerLogger(self): |
1789 | 617 | """Return the logger instance from buildd-slave-scanner.py.""" | 616 | """Return the logger instance from buildd-slave-scanner.py.""" |
1790 | @@ -622,27 +621,6 @@ | |||
1791 | 622 | logger = logging.getLogger('slave-scanner') | 621 | logger = logging.getLogger('slave-scanner') |
1792 | 623 | return logger | 622 | return logger |
1793 | 624 | 623 | ||
1794 | 625 | def acquireBuildCandidate(self): | ||
1795 | 626 | """Acquire a build candidate in an atomic fashion. | ||
1796 | 627 | |||
1797 | 628 | When retrieiving a candidate we need to mark it as building | ||
1798 | 629 | immediately so that it is not dispatched by another builder in the | ||
1799 | 630 | build manager. | ||
1800 | 631 | |||
1801 | 632 | We can consider this to be atomic because although the build manager | ||
1802 | 633 | is a Twisted app and gives the appearance of doing lots of things at | ||
1803 | 634 | once, it's still single-threaded so no more than one builder scan | ||
1804 | 635 | can be in this code at the same time. | ||
1805 | 636 | |||
1806 | 637 | If there's ever more than one build manager running at once, then | ||
1807 | 638 | this code will need some sort of mutex. | ||
1808 | 639 | """ | ||
1809 | 640 | candidate = self._findBuildCandidate() | ||
1810 | 641 | if candidate is not None: | ||
1811 | 642 | candidate.markAsBuilding(self) | ||
1812 | 643 | transaction.commit() | ||
1813 | 644 | return candidate | ||
1814 | 645 | |||
1815 | 646 | def _findBuildCandidate(self): | 624 | def _findBuildCandidate(self): |
1816 | 647 | """Find a candidate job for dispatch to an idle buildd slave. | 625 | """Find a candidate job for dispatch to an idle buildd slave. |
1817 | 648 | 626 | ||
1818 | @@ -722,46 +700,52 @@ | |||
1819 | 722 | :param candidate: The job to dispatch. | 700 | :param candidate: The job to dispatch. |
1820 | 723 | """ | 701 | """ |
1821 | 724 | logger = self._getSlaveScannerLogger() | 702 | logger = self._getSlaveScannerLogger() |
1826 | 725 | # Using maybeDeferred ensures that any exceptions are also | 703 | try: |
1827 | 726 | # wrapped up and caught later. | 704 | self.startBuild(candidate, logger) |
1828 | 727 | d = defer.maybeDeferred(self.startBuild, candidate, logger) | 705 | except (BuildSlaveFailure, CannotBuild, BuildBehaviorMismatch), err: |
1829 | 728 | return d | 706 | logger.warn('Could not build: %s' % err) |
1830 | 729 | 707 | ||
1831 | 730 | def handleTimeout(self, logger, error_message): | 708 | def handleTimeout(self, logger, error_message): |
1832 | 731 | """See IBuilder.""" | 709 | """See IBuilder.""" |
1833 | 710 | builder_should_be_failed = True | ||
1834 | 711 | |||
1835 | 732 | if self.virtualized: | 712 | if self.virtualized: |
1836 | 733 | # Virtualized/PPA builder: attempt a reset. | 713 | # Virtualized/PPA builder: attempt a reset. |
1837 | 734 | logger.warn( | 714 | logger.warn( |
1838 | 735 | "Resetting builder: %s -- %s" % (self.url, error_message), | 715 | "Resetting builder: %s -- %s" % (self.url, error_message), |
1839 | 736 | exc_info=True) | 716 | exc_info=True) |
1845 | 737 | d = self.resumeSlaveHost() | 717 | try: |
1846 | 738 | return d | 718 | self.resumeSlaveHost() |
1847 | 739 | else: | 719 | except CannotResumeHost, err: |
1848 | 740 | # XXX: This should really let the failure bubble up to the | 720 | # Failed to reset builder. |
1849 | 741 | # scan() method that does the failure counting. | 721 | logger.warn( |
1850 | 722 | "Failed to reset builder: %s -- %s" % | ||
1851 | 723 | (self.url, str(err)), exc_info=True) | ||
1852 | 724 | else: | ||
1853 | 725 | # Builder was reset, do *not* mark it as failed. | ||
1854 | 726 | builder_should_be_failed = False | ||
1855 | 727 | |||
1856 | 728 | if builder_should_be_failed: | ||
1857 | 742 | # Mark builder as 'failed'. | 729 | # Mark builder as 'failed'. |
1858 | 743 | logger.warn( | 730 | logger.warn( |
1860 | 744 | "Disabling builder: %s -- %s" % (self.url, error_message)) | 731 | "Disabling builder: %s -- %s" % (self.url, error_message), |
1861 | 732 | exc_info=True) | ||
1862 | 745 | self.failBuilder(error_message) | 733 | self.failBuilder(error_message) |
1863 | 746 | return defer.succeed(None) | ||
1864 | 747 | 734 | ||
1865 | 748 | def findAndStartJob(self, buildd_slave=None): | 735 | def findAndStartJob(self, buildd_slave=None): |
1866 | 749 | """See IBuilder.""" | 736 | """See IBuilder.""" |
1867 | 750 | # XXX This method should be removed in favour of two separately | ||
1868 | 751 | # called methods that find and dispatch the job. It will | ||
1869 | 752 | # require a lot of test fixing. | ||
1870 | 753 | logger = self._getSlaveScannerLogger() | 737 | logger = self._getSlaveScannerLogger() |
1872 | 754 | candidate = self.acquireBuildCandidate() | 738 | candidate = self._findBuildCandidate() |
1873 | 755 | 739 | ||
1874 | 756 | if candidate is None: | 740 | if candidate is None: |
1875 | 757 | logger.debug("No build candidates available for builder.") | 741 | logger.debug("No build candidates available for builder.") |
1877 | 758 | return defer.succeed(None) | 742 | return None |
1878 | 759 | 743 | ||
1879 | 760 | if buildd_slave is not None: | 744 | if buildd_slave is not None: |
1880 | 761 | self.setSlaveForTesting(buildd_slave) | 745 | self.setSlaveForTesting(buildd_slave) |
1881 | 762 | 746 | ||
1884 | 763 | d = self._dispatchBuildCandidate(candidate) | 747 | self._dispatchBuildCandidate(candidate) |
1885 | 764 | return d.addCallback(lambda ignored: candidate) | 748 | return candidate |
1886 | 765 | 749 | ||
1887 | 766 | def getBuildQueue(self): | 750 | def getBuildQueue(self): |
1888 | 767 | """See `IBuilder`.""" | 751 | """See `IBuilder`.""" |
1889 | 768 | 752 | ||
1890 | === modified file 'lib/lp/buildmaster/model/buildfarmjobbehavior.py' | |||
1891 | --- lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-10-20 11:54:27 +0000 | |||
1892 | +++ lib/lp/buildmaster/model/buildfarmjobbehavior.py 2010-12-07 16:24:04 +0000 | |||
1893 | @@ -16,18 +16,13 @@ | |||
1894 | 16 | import socket | 16 | import socket |
1895 | 17 | import xmlrpclib | 17 | import xmlrpclib |
1896 | 18 | 18 | ||
1897 | 19 | from twisted.internet import defer | ||
1898 | 20 | |||
1899 | 21 | from zope.component import getUtility | 19 | from zope.component import getUtility |
1900 | 22 | from zope.interface import implements | 20 | from zope.interface import implements |
1901 | 23 | from zope.security.proxy import removeSecurityProxy | 21 | from zope.security.proxy import removeSecurityProxy |
1902 | 24 | 22 | ||
1903 | 25 | from canonical import encoding | 23 | from canonical import encoding |
1904 | 26 | from canonical.librarian.interfaces import ILibrarianClient | 24 | from canonical.librarian.interfaces import ILibrarianClient |
1909 | 27 | from lp.buildmaster.interfaces.builder import ( | 25 | from lp.buildmaster.interfaces.builder import CorruptBuildCookie |
1906 | 28 | BuildSlaveFailure, | ||
1907 | 29 | CorruptBuildCookie, | ||
1908 | 30 | ) | ||
1910 | 31 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( | 26 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( |
1911 | 32 | BuildBehaviorMismatch, | 27 | BuildBehaviorMismatch, |
1912 | 33 | IBuildFarmJobBehavior, | 28 | IBuildFarmJobBehavior, |
1913 | @@ -74,53 +69,54 @@ | |||
1914 | 74 | """See `IBuildFarmJobBehavior`.""" | 69 | """See `IBuildFarmJobBehavior`.""" |
1915 | 75 | logger = logging.getLogger('slave-scanner') | 70 | logger = logging.getLogger('slave-scanner') |
1916 | 76 | 71 | ||
1922 | 77 | d = self._builder.slaveStatus() | 72 | try: |
1923 | 78 | 73 | slave_status = self._builder.slaveStatus() | |
1924 | 79 | def got_failure(failure): | 74 | except (xmlrpclib.Fault, socket.error), info: |
1925 | 80 | failure.trap(xmlrpclib.Fault, socket.error) | 75 | # XXX cprov 2005-06-29: |
1926 | 81 | info = failure.value | 76 | # Hmm, a problem with the xmlrpc interface, |
1927 | 77 | # disable the builder ?? or simple notice the failure | ||
1928 | 78 | # with a timestamp. | ||
1929 | 82 | info = ("Could not contact the builder %s, caught a (%s)" | 79 | info = ("Could not contact the builder %s, caught a (%s)" |
1930 | 83 | % (queueItem.builder.url, info)) | 80 | % (queueItem.builder.url, info)) |
1971 | 84 | raise BuildSlaveFailure(info) | 81 | logger.debug(info, exc_info=True) |
1972 | 85 | 82 | # keep the job for scan | |
1973 | 86 | def got_status(slave_status): | 83 | return |
1974 | 87 | builder_status_handlers = { | 84 | |
1975 | 88 | 'BuilderStatus.IDLE': self.updateBuild_IDLE, | 85 | builder_status_handlers = { |
1976 | 89 | 'BuilderStatus.BUILDING': self.updateBuild_BUILDING, | 86 | 'BuilderStatus.IDLE': self.updateBuild_IDLE, |
1977 | 90 | 'BuilderStatus.ABORTING': self.updateBuild_ABORTING, | 87 | 'BuilderStatus.BUILDING': self.updateBuild_BUILDING, |
1978 | 91 | 'BuilderStatus.ABORTED': self.updateBuild_ABORTED, | 88 | 'BuilderStatus.ABORTING': self.updateBuild_ABORTING, |
1979 | 92 | 'BuilderStatus.WAITING': self.updateBuild_WAITING, | 89 | 'BuilderStatus.ABORTED': self.updateBuild_ABORTED, |
1980 | 93 | } | 90 | 'BuilderStatus.WAITING': self.updateBuild_WAITING, |
1981 | 94 | 91 | } | |
1982 | 95 | builder_status = slave_status['builder_status'] | 92 | |
1983 | 96 | if builder_status not in builder_status_handlers: | 93 | builder_status = slave_status['builder_status'] |
1984 | 97 | logger.critical( | 94 | if builder_status not in builder_status_handlers: |
1985 | 98 | "Builder on %s returned unknown status %s, failing it" | 95 | logger.critical( |
1986 | 99 | % (self._builder.url, builder_status)) | 96 | "Builder on %s returned unknown status %s, failing it" |
1987 | 100 | self._builder.failBuilder( | 97 | % (self._builder.url, builder_status)) |
1988 | 101 | "Unknown status code (%s) returned from status() probe." | 98 | self._builder.failBuilder( |
1989 | 102 | % builder_status) | 99 | "Unknown status code (%s) returned from status() probe." |
1990 | 103 | # XXX: This will leave the build and job in a bad state, but | 100 | % builder_status) |
1991 | 104 | # should never be possible, since our builder statuses are | 101 | # XXX: This will leave the build and job in a bad state, but |
1992 | 105 | # known. | 102 | # should never be possible, since our builder statuses are |
1993 | 106 | queueItem._builder = None | 103 | # known. |
1994 | 107 | queueItem.setDateStarted(None) | 104 | queueItem._builder = None |
1995 | 108 | return | 105 | queueItem.setDateStarted(None) |
1996 | 109 | 106 | return | |
1997 | 110 | # Since logtail is a xmlrpclib.Binary container and it is | 107 | |
1998 | 111 | # returned from the IBuilder content class, it arrives | 108 | # Since logtail is a xmlrpclib.Binary container and it is returned |
1999 | 112 | # protected by a Zope Security Proxy, which is not declared, | 109 | # from the IBuilder content class, it arrives protected by a Zope |
2000 | 113 | # thus empty. Before passing it to the status handlers we | 110 | # Security Proxy, which is not declared, thus empty. Before passing |
2001 | 114 | # will simply remove the proxy. | 111 | # it to the status handlers we will simply remove the proxy. |
2002 | 115 | logtail = removeSecurityProxy(slave_status.get('logtail')) | 112 | logtail = removeSecurityProxy(slave_status.get('logtail')) |
2003 | 116 | 113 | ||
2004 | 117 | method = builder_status_handlers[builder_status] | 114 | method = builder_status_handlers[builder_status] |
2005 | 118 | return defer.maybeDeferred( | 115 | try: |
2006 | 119 | method, queueItem, slave_status, logtail, logger) | 116 | method(queueItem, slave_status, logtail, logger) |
2007 | 120 | 117 | except TypeError, e: | |
2008 | 121 | d.addErrback(got_failure) | 118 | logger.critical("Received wrong number of args in response.") |
2009 | 122 | d.addCallback(got_status) | 119 | logger.exception(e) |
1970 | 123 | return d | ||
2010 | 124 | 120 | ||
2011 | 125 | def updateBuild_IDLE(self, queueItem, slave_status, logtail, logger): | 121 | def updateBuild_IDLE(self, queueItem, slave_status, logtail, logger): |
2012 | 126 | """Somehow the builder forgot about the build job. | 122 | """Somehow the builder forgot about the build job. |
2013 | @@ -150,13 +146,11 @@ | |||
2014 | 150 | 146 | ||
2015 | 151 | Clean the builder for another jobs. | 147 | Clean the builder for another jobs. |
2016 | 152 | """ | 148 | """ |
2024 | 153 | d = queueItem.builder.cleanSlave() | 149 | queueItem.builder.cleanSlave() |
2025 | 154 | def got_cleaned(ignored): | 150 | queueItem.builder = None |
2026 | 155 | queueItem.builder = None | 151 | if queueItem.job.status != JobStatus.FAILED: |
2027 | 156 | if queueItem.job.status != JobStatus.FAILED: | 152 | queueItem.job.fail() |
2028 | 157 | queueItem.job.fail() | 153 | queueItem.specific_job.jobAborted() |
2022 | 158 | queueItem.specific_job.jobAborted() | ||
2023 | 159 | return d.addCallback(got_cleaned) | ||
2029 | 160 | 154 | ||
2030 | 161 | def extractBuildStatus(self, slave_status): | 155 | def extractBuildStatus(self, slave_status): |
2031 | 162 | """Read build status name. | 156 | """Read build status name. |
2032 | @@ -191,8 +185,6 @@ | |||
2033 | 191 | # XXX: dsilvers 2005-03-02: Confirm the builder has the right build? | 185 | # XXX: dsilvers 2005-03-02: Confirm the builder has the right build? |
2034 | 192 | 186 | ||
2035 | 193 | build = queueItem.specific_job.build | 187 | build = queueItem.specific_job.build |
2036 | 194 | # XXX 2010-10-18 bug=662631 | ||
2037 | 195 | # Change this to do non-blocking IO. | ||
2038 | 196 | build.handleStatus(build_status, librarian, slave_status) | 188 | build.handleStatus(build_status, librarian, slave_status) |
2039 | 197 | 189 | ||
2040 | 198 | 190 | ||
2041 | 199 | 191 | ||
2042 | === modified file 'lib/lp/buildmaster/model/packagebuild.py' | |||
2043 | --- lib/lp/buildmaster/model/packagebuild.py 2010-10-26 20:43:50 +0000 | |||
2044 | +++ lib/lp/buildmaster/model/packagebuild.py 2010-12-07 16:24:04 +0000 | |||
2045 | @@ -163,8 +163,6 @@ | |||
2046 | 163 | def getLogFromSlave(package_build): | 163 | def getLogFromSlave(package_build): |
2047 | 164 | """See `IPackageBuild`.""" | 164 | """See `IPackageBuild`.""" |
2048 | 165 | builder = package_build.buildqueue_record.builder | 165 | builder = package_build.buildqueue_record.builder |
2049 | 166 | # XXX 2010-10-18 bug=662631 | ||
2050 | 167 | # Change this to do non-blocking IO. | ||
2051 | 168 | return builder.transferSlaveFileToLibrarian( | 166 | return builder.transferSlaveFileToLibrarian( |
2052 | 169 | SLAVE_LOG_FILENAME, | 167 | SLAVE_LOG_FILENAME, |
2053 | 170 | package_build.buildqueue_record.getLogFileName(), | 168 | package_build.buildqueue_record.getLogFileName(), |
2054 | @@ -180,8 +178,6 @@ | |||
2055 | 180 | # log, builder and date_finished are read-only, so we must | 178 | # log, builder and date_finished are read-only, so we must |
2056 | 181 | # currently remove the security proxy to set them. | 179 | # currently remove the security proxy to set them. |
2057 | 182 | naked_build = removeSecurityProxy(build) | 180 | naked_build = removeSecurityProxy(build) |
2058 | 183 | # XXX 2010-10-18 bug=662631 | ||
2059 | 184 | # Change this to do non-blocking IO. | ||
2060 | 185 | naked_build.log = build.getLogFromSlave(build) | 181 | naked_build.log = build.getLogFromSlave(build) |
2061 | 186 | naked_build.builder = build.buildqueue_record.builder | 182 | naked_build.builder = build.buildqueue_record.builder |
2062 | 187 | # XXX cprov 20060615 bug=120584: Currently buildduration includes | 183 | # XXX cprov 20060615 bug=120584: Currently buildduration includes |
2063 | @@ -278,8 +274,6 @@ | |||
2064 | 278 | logger.critical("Unknown BuildStatus '%s' for builder '%s'" | 274 | logger.critical("Unknown BuildStatus '%s' for builder '%s'" |
2065 | 279 | % (status, self.buildqueue_record.builder.url)) | 275 | % (status, self.buildqueue_record.builder.url)) |
2066 | 280 | return | 276 | return |
2067 | 281 | # XXX 2010-10-18 bug=662631 | ||
2068 | 282 | # Change this to do non-blocking IO. | ||
2069 | 283 | method(librarian, slave_status, logger) | 277 | method(librarian, slave_status, logger) |
2070 | 284 | 278 | ||
2071 | 285 | def _handleStatus_OK(self, librarian, slave_status, logger): | 279 | def _handleStatus_OK(self, librarian, slave_status, logger): |
2072 | 286 | 280 | ||
2073 | === modified file 'lib/lp/buildmaster/tests/mock_slaves.py' | |||
2074 | --- lib/lp/buildmaster/tests/mock_slaves.py 2010-10-14 15:37:56 +0000 | |||
2075 | +++ lib/lp/buildmaster/tests/mock_slaves.py 2010-12-07 16:24:04 +0000 | |||
2076 | @@ -6,40 +6,21 @@ | |||
2077 | 6 | __metaclass__ = type | 6 | __metaclass__ = type |
2078 | 7 | 7 | ||
2079 | 8 | __all__ = [ | 8 | __all__ = [ |
2082 | 9 | 'AbortedSlave', | 9 | 'MockBuilder', |
2083 | 10 | 'AbortingSlave', | 10 | 'LostBuildingBrokenSlave', |
2084 | 11 | 'BrokenSlave', | 11 | 'BrokenSlave', |
2085 | 12 | 'OkSlave', | ||
2086 | 12 | 'BuildingSlave', | 13 | 'BuildingSlave', |
2094 | 13 | 'CorruptBehavior', | 14 | 'AbortedSlave', |
2088 | 14 | 'DeadProxy', | ||
2089 | 15 | 'LostBuildingBrokenSlave', | ||
2090 | 16 | 'MockBuilder', | ||
2091 | 17 | 'OkSlave', | ||
2092 | 18 | 'SlaveTestHelpers', | ||
2093 | 19 | 'TrivialBehavior', | ||
2095 | 20 | 'WaitingSlave', | 15 | 'WaitingSlave', |
2096 | 16 | 'AbortingSlave', | ||
2097 | 21 | ] | 17 | ] |
2098 | 22 | 18 | ||
2099 | 23 | import fixtures | ||
2100 | 24 | import os | ||
2101 | 25 | |||
2102 | 26 | from StringIO import StringIO | 19 | from StringIO import StringIO |
2103 | 27 | import xmlrpclib | 20 | import xmlrpclib |
2104 | 28 | 21 | ||
2117 | 29 | from testtools.content import Content | 22 | from lp.buildmaster.interfaces.builder import CannotFetchFile |
2106 | 30 | from testtools.content_type import UTF8_TEXT | ||
2107 | 31 | |||
2108 | 32 | from twisted.internet import defer | ||
2109 | 33 | from twisted.web import xmlrpc | ||
2110 | 34 | |||
2111 | 35 | from canonical.buildd.tests.harness import BuilddSlaveTestSetup | ||
2112 | 36 | |||
2113 | 37 | from lp.buildmaster.interfaces.builder import ( | ||
2114 | 38 | CannotFetchFile, | ||
2115 | 39 | CorruptBuildCookie, | ||
2116 | 40 | ) | ||
2118 | 41 | from lp.buildmaster.model.builder import ( | 23 | from lp.buildmaster.model.builder import ( |
2119 | 42 | BuilderSlave, | ||
2120 | 43 | rescueBuilderIfLost, | 24 | rescueBuilderIfLost, |
2121 | 44 | updateBuilderStatus, | 25 | updateBuilderStatus, |
2122 | 45 | ) | 26 | ) |
2123 | @@ -78,9 +59,15 @@ | |||
2124 | 78 | slave_build_id) | 59 | slave_build_id) |
2125 | 79 | 60 | ||
2126 | 80 | def cleanSlave(self): | 61 | def cleanSlave(self): |
2127 | 62 | # XXX: This should not print anything. The print is only here to make | ||
2128 | 63 | # doc/builder.txt a meaningful test. | ||
2129 | 64 | print 'Cleaning slave' | ||
2130 | 81 | return self.slave.clean() | 65 | return self.slave.clean() |
2131 | 82 | 66 | ||
2132 | 83 | def requestAbort(self): | 67 | def requestAbort(self): |
2133 | 68 | # XXX: This should not print anything. The print is only here to make | ||
2134 | 69 | # doc/builder.txt a meaningful test. | ||
2135 | 70 | print 'Aborting slave' | ||
2136 | 84 | return self.slave.abort() | 71 | return self.slave.abort() |
2137 | 85 | 72 | ||
2138 | 86 | def resumeSlave(self, logger): | 73 | def resumeSlave(self, logger): |
2139 | @@ -90,10 +77,10 @@ | |||
2140 | 90 | pass | 77 | pass |
2141 | 91 | 78 | ||
2142 | 92 | def rescueIfLost(self, logger=None): | 79 | def rescueIfLost(self, logger=None): |
2144 | 93 | return rescueBuilderIfLost(self, logger) | 80 | rescueBuilderIfLost(self, logger) |
2145 | 94 | 81 | ||
2146 | 95 | def updateStatus(self, logger=None): | 82 | def updateStatus(self, logger=None): |
2148 | 96 | return defer.maybeDeferred(updateBuilderStatus, self, logger) | 83 | updateBuilderStatus(self, logger) |
2149 | 97 | 84 | ||
2150 | 98 | 85 | ||
2151 | 99 | # XXX: It would be *really* nice to run some set of tests against the real | 86 | # XXX: It would be *really* nice to run some set of tests against the real |
2152 | @@ -108,44 +95,36 @@ | |||
2153 | 108 | self.arch_tag = arch_tag | 95 | self.arch_tag = arch_tag |
2154 | 109 | 96 | ||
2155 | 110 | def status(self): | 97 | def status(self): |
2157 | 111 | return defer.succeed(('BuilderStatus.IDLE', '')) | 98 | return ('BuilderStatus.IDLE', '') |
2158 | 112 | 99 | ||
2159 | 113 | def ensurepresent(self, sha1, url, user=None, password=None): | 100 | def ensurepresent(self, sha1, url, user=None, password=None): |
2160 | 114 | self.call_log.append(('ensurepresent', url, user, password)) | 101 | self.call_log.append(('ensurepresent', url, user, password)) |
2162 | 115 | return defer.succeed((True, None)) | 102 | return True, None |
2163 | 116 | 103 | ||
2164 | 117 | def build(self, buildid, buildtype, chroot, filemap, args): | 104 | def build(self, buildid, buildtype, chroot, filemap, args): |
2165 | 118 | self.call_log.append( | 105 | self.call_log.append( |
2166 | 119 | ('build', buildid, buildtype, chroot, filemap.keys(), args)) | 106 | ('build', buildid, buildtype, chroot, filemap.keys(), args)) |
2167 | 120 | info = 'OkSlave BUILDING' | 107 | info = 'OkSlave BUILDING' |
2169 | 121 | return defer.succeed(('BuildStatus.Building', info)) | 108 | return ('BuildStatus.Building', info) |
2170 | 122 | 109 | ||
2171 | 123 | def echo(self, *args): | 110 | def echo(self, *args): |
2172 | 124 | self.call_log.append(('echo',) + args) | 111 | self.call_log.append(('echo',) + args) |
2174 | 125 | return defer.succeed(args) | 112 | return args |
2175 | 126 | 113 | ||
2176 | 127 | def clean(self): | 114 | def clean(self): |
2177 | 128 | self.call_log.append('clean') | 115 | self.call_log.append('clean') |
2178 | 129 | return defer.succeed(None) | ||
2179 | 130 | 116 | ||
2180 | 131 | def abort(self): | 117 | def abort(self): |
2181 | 132 | self.call_log.append('abort') | 118 | self.call_log.append('abort') |
2182 | 133 | return defer.succeed(None) | ||
2183 | 134 | 119 | ||
2184 | 135 | def info(self): | 120 | def info(self): |
2185 | 136 | self.call_log.append('info') | 121 | self.call_log.append('info') |
2191 | 137 | return defer.succeed(('1.0', self.arch_tag, 'debian')) | 122 | return ('1.0', self.arch_tag, 'debian') |
2187 | 138 | |||
2188 | 139 | def resume(self): | ||
2189 | 140 | self.call_log.append('resume') | ||
2190 | 141 | return defer.succeed(("", "", 0)) | ||
2192 | 142 | 123 | ||
2193 | 143 | def sendFileToSlave(self, sha1, url, username="", password=""): | 124 | def sendFileToSlave(self, sha1, url, username="", password=""): |
2199 | 144 | d = self.ensurepresent(sha1, url, username, password) | 125 | present, info = self.ensurepresent(sha1, url, username, password) |
2200 | 145 | def check_present((present, info)): | 126 | if not present: |
2201 | 146 | if not present: | 127 | raise CannotFetchFile(url, info) |
2197 | 147 | raise CannotFetchFile(url, info) | ||
2198 | 148 | return d.addCallback(check_present) | ||
2202 | 149 | 128 | ||
2203 | 150 | def cacheFile(self, logger, libraryfilealias): | 129 | def cacheFile(self, logger, libraryfilealias): |
2204 | 151 | return self.sendFileToSlave( | 130 | return self.sendFileToSlave( |
2205 | @@ -162,11 +141,9 @@ | |||
2206 | 162 | def status(self): | 141 | def status(self): |
2207 | 163 | self.call_log.append('status') | 142 | self.call_log.append('status') |
2208 | 164 | buildlog = xmlrpclib.Binary("This is a build log") | 143 | buildlog = xmlrpclib.Binary("This is a build log") |
2211 | 165 | return defer.succeed( | 144 | return ('BuilderStatus.BUILDING', self.build_id, buildlog) |
2210 | 166 | ('BuilderStatus.BUILDING', self.build_id, buildlog)) | ||
2212 | 167 | 145 | ||
2213 | 168 | def getFile(self, sum): | 146 | def getFile(self, sum): |
2214 | 169 | # XXX: This needs to be updated to return a Deferred. | ||
2215 | 170 | self.call_log.append('getFile') | 147 | self.call_log.append('getFile') |
2216 | 171 | if sum == "buildlog": | 148 | if sum == "buildlog": |
2217 | 172 | s = StringIO("This is a build log") | 149 | s = StringIO("This is a build log") |
2218 | @@ -178,15 +155,11 @@ | |||
2219 | 178 | """A mock slave that looks like it's currently waiting.""" | 155 | """A mock slave that looks like it's currently waiting.""" |
2220 | 179 | 156 | ||
2221 | 180 | def __init__(self, state='BuildStatus.OK', dependencies=None, | 157 | def __init__(self, state='BuildStatus.OK', dependencies=None, |
2223 | 181 | build_id='1-1', filemap=None): | 158 | build_id='1-1'): |
2224 | 182 | super(WaitingSlave, self).__init__() | 159 | super(WaitingSlave, self).__init__() |
2225 | 183 | self.state = state | 160 | self.state = state |
2226 | 184 | self.dependencies = dependencies | 161 | self.dependencies = dependencies |
2227 | 185 | self.build_id = build_id | 162 | self.build_id = build_id |
2228 | 186 | if filemap is None: | ||
2229 | 187 | self.filemap = {} | ||
2230 | 188 | else: | ||
2231 | 189 | self.filemap = filemap | ||
2232 | 190 | 163 | ||
2233 | 191 | # By default, the slave only has a buildlog, but callsites | 164 | # By default, the slave only has a buildlog, but callsites |
2234 | 192 | # can update this list as needed. | 165 | # can update this list as needed. |
2235 | @@ -194,12 +167,10 @@ | |||
2236 | 194 | 167 | ||
2237 | 195 | def status(self): | 168 | def status(self): |
2238 | 196 | self.call_log.append('status') | 169 | self.call_log.append('status') |
2242 | 197 | return defer.succeed(( | 170 | return ('BuilderStatus.WAITING', self.state, self.build_id, {}, |
2243 | 198 | 'BuilderStatus.WAITING', self.state, self.build_id, self.filemap, | 171 | self.dependencies) |
2241 | 199 | self.dependencies)) | ||
2244 | 200 | 172 | ||
2245 | 201 | def getFile(self, hash): | 173 | def getFile(self, hash): |
2246 | 202 | # XXX: This needs to be updated to return a Deferred. | ||
2247 | 203 | self.call_log.append('getFile') | 174 | self.call_log.append('getFile') |
2248 | 204 | if hash in self.valid_file_hashes: | 175 | if hash in self.valid_file_hashes: |
2249 | 205 | content = "This is a %s" % hash | 176 | content = "This is a %s" % hash |
2250 | @@ -213,19 +184,15 @@ | |||
2251 | 213 | 184 | ||
2252 | 214 | def status(self): | 185 | def status(self): |
2253 | 215 | self.call_log.append('status') | 186 | self.call_log.append('status') |
2255 | 216 | return defer.succeed(('BuilderStatus.ABORTING', '1-1')) | 187 | return ('BuilderStatus.ABORTING', '1-1') |
2256 | 217 | 188 | ||
2257 | 218 | 189 | ||
2258 | 219 | class AbortedSlave(OkSlave): | 190 | class AbortedSlave(OkSlave): |
2259 | 220 | """A mock slave that looks like it's aborted.""" | 191 | """A mock slave that looks like it's aborted.""" |
2260 | 221 | 192 | ||
2262 | 222 | def clean(self): | 193 | def status(self): |
2263 | 223 | self.call_log.append('status') | 194 | self.call_log.append('status') |
2269 | 224 | return defer.succeed(None) | 195 | return ('BuilderStatus.ABORTED', '1-1') |
2265 | 225 | |||
2266 | 226 | def status(self): | ||
2267 | 227 | self.call_log.append('clean') | ||
2268 | 228 | return defer.succeed(('BuilderStatus.ABORTED', '1-1')) | ||
2270 | 229 | 196 | ||
2271 | 230 | 197 | ||
2272 | 231 | class LostBuildingBrokenSlave: | 198 | class LostBuildingBrokenSlave: |
2273 | @@ -239,108 +206,16 @@ | |||
2274 | 239 | 206 | ||
2275 | 240 | def status(self): | 207 | def status(self): |
2276 | 241 | self.call_log.append('status') | 208 | self.call_log.append('status') |
2278 | 242 | return defer.succeed(('BuilderStatus.BUILDING', '1000-10000')) | 209 | return ('BuilderStatus.BUILDING', '1000-10000') |
2279 | 243 | 210 | ||
2280 | 244 | def abort(self): | 211 | def abort(self): |
2281 | 245 | self.call_log.append('abort') | 212 | self.call_log.append('abort') |
2283 | 246 | return defer.fail(xmlrpclib.Fault(8002, "Could not abort")) | 213 | raise xmlrpclib.Fault(8002, "Could not abort") |
2284 | 247 | 214 | ||
2285 | 248 | 215 | ||
2286 | 249 | class BrokenSlave: | 216 | class BrokenSlave: |
2287 | 250 | """A mock slave that reports that it is broken.""" | 217 | """A mock slave that reports that it is broken.""" |
2288 | 251 | 218 | ||
2289 | 252 | def __init__(self): | ||
2290 | 253 | self.call_log = [] | ||
2291 | 254 | |||
2292 | 255 | def status(self): | 219 | def status(self): |
2293 | 256 | self.call_log.append('status') | 220 | self.call_log.append('status') |
2384 | 257 | return defer.fail(xmlrpclib.Fault(8001, "Broken slave")) | 221 | raise xmlrpclib.Fault(8001, "Broken slave") |
2295 | 258 | |||
2296 | 259 | |||
2297 | 260 | class CorruptBehavior: | ||
2298 | 261 | |||
2299 | 262 | def verifySlaveBuildCookie(self, cookie): | ||
2300 | 263 | raise CorruptBuildCookie("Bad value: %r" % (cookie,)) | ||
2301 | 264 | |||
2302 | 265 | |||
2303 | 266 | class TrivialBehavior: | ||
2304 | 267 | |||
2305 | 268 | def verifySlaveBuildCookie(self, cookie): | ||
2306 | 269 | pass | ||
2307 | 270 | |||
2308 | 271 | |||
2309 | 272 | class DeadProxy(xmlrpc.Proxy): | ||
2310 | 273 | """An xmlrpc.Proxy that doesn't actually send any messages. | ||
2311 | 274 | |||
2312 | 275 | Used when you want to test timeouts, for example. | ||
2313 | 276 | """ | ||
2314 | 277 | |||
2315 | 278 | def callRemote(self, *args, **kwargs): | ||
2316 | 279 | return defer.Deferred() | ||
2317 | 280 | |||
2318 | 281 | |||
2319 | 282 | class SlaveTestHelpers(fixtures.Fixture): | ||
2320 | 283 | |||
2321 | 284 | # The URL for the XML-RPC service set up by `BuilddSlaveTestSetup`. | ||
2322 | 285 | BASE_URL = 'http://localhost:8221' | ||
2323 | 286 | TEST_URL = '%s/rpc/' % (BASE_URL,) | ||
2324 | 287 | |||
2325 | 288 | def getServerSlave(self): | ||
2326 | 289 | """Set up a test build slave server. | ||
2327 | 290 | |||
2328 | 291 | :return: A `BuilddSlaveTestSetup` object. | ||
2329 | 292 | """ | ||
2330 | 293 | tachandler = BuilddSlaveTestSetup() | ||
2331 | 294 | tachandler.setUp() | ||
2332 | 295 | # Basically impossible to do this w/ TrialTestCase. But it would be | ||
2333 | 296 | # really nice to keep it. | ||
2334 | 297 | # | ||
2335 | 298 | # def addLogFile(exc_info): | ||
2336 | 299 | # self.addDetail( | ||
2337 | 300 | # 'xmlrpc-log-file', | ||
2338 | 301 | # Content(UTF8_TEXT, lambda: open(tachandler.logfile, 'r').read())) | ||
2339 | 302 | # self.addOnException(addLogFile) | ||
2340 | 303 | self.addCleanup(tachandler.tearDown) | ||
2341 | 304 | return tachandler | ||
2342 | 305 | |||
2343 | 306 | def getClientSlave(self, reactor=None, proxy=None): | ||
2344 | 307 | """Return a `BuilderSlave` for use in testing. | ||
2345 | 308 | |||
2346 | 309 | Points to a fixed URL that is also used by `BuilddSlaveTestSetup`. | ||
2347 | 310 | """ | ||
2348 | 311 | return BuilderSlave.makeBuilderSlave( | ||
2349 | 312 | self.TEST_URL, 'vmhost', reactor, proxy) | ||
2350 | 313 | |||
2351 | 314 | def makeCacheFile(self, tachandler, filename): | ||
2352 | 315 | """Make a cache file available on the remote slave. | ||
2353 | 316 | |||
2354 | 317 | :param tachandler: The TacTestSetup object used to start the remote | ||
2355 | 318 | slave. | ||
2356 | 319 | :param filename: The name of the file to create in the file cache | ||
2357 | 320 | area. | ||
2358 | 321 | """ | ||
2359 | 322 | path = os.path.join(tachandler.root, 'filecache', filename) | ||
2360 | 323 | fd = open(path, 'w') | ||
2361 | 324 | fd.write('something') | ||
2362 | 325 | fd.close() | ||
2363 | 326 | self.addCleanup(os.unlink, path) | ||
2364 | 327 | |||
2365 | 328 | def triggerGoodBuild(self, slave, build_id=None): | ||
2366 | 329 | """Trigger a good build on 'slave'. | ||
2367 | 330 | |||
2368 | 331 | :param slave: A `BuilderSlave` instance to trigger the build on. | ||
2369 | 332 | :param build_id: The build identifier. If not specified, defaults to | ||
2370 | 333 | an arbitrary string. | ||
2371 | 334 | :type build_id: str | ||
2372 | 335 | :return: The build id returned by the slave. | ||
2373 | 336 | """ | ||
2374 | 337 | if build_id is None: | ||
2375 | 338 | build_id = 'random-build-id' | ||
2376 | 339 | tachandler = self.getServerSlave() | ||
2377 | 340 | chroot_file = 'fake-chroot' | ||
2378 | 341 | dsc_file = 'thing' | ||
2379 | 342 | self.makeCacheFile(tachandler, chroot_file) | ||
2380 | 343 | self.makeCacheFile(tachandler, dsc_file) | ||
2381 | 344 | return slave.build( | ||
2382 | 345 | build_id, 'debian', chroot_file, {'.dsc': dsc_file}, | ||
2383 | 346 | {'ogrecomponent': 'main'}) | ||
2385 | 347 | 222 | ||
2386 | === modified file 'lib/lp/buildmaster/tests/test_builder.py' | |||
2387 | --- lib/lp/buildmaster/tests/test_builder.py 2010-10-18 16:44:22 +0000 | |||
2388 | +++ lib/lp/buildmaster/tests/test_builder.py 2010-12-07 16:24:04 +0000 | |||
2389 | @@ -3,24 +3,20 @@ | |||
2390 | 3 | 3 | ||
2391 | 4 | """Test Builder features.""" | 4 | """Test Builder features.""" |
2392 | 5 | 5 | ||
2393 | 6 | import errno | ||
2394 | 6 | import os | 7 | import os |
2396 | 7 | import signal | 8 | import socket |
2397 | 8 | import xmlrpclib | 9 | import xmlrpclib |
2398 | 9 | 10 | ||
2405 | 10 | from twisted.web.client import getPage | 11 | from testtools.content import Content |
2406 | 11 | 12 | from testtools.content_type import UTF8_TEXT | |
2401 | 12 | from twisted.internet.defer import CancelledError | ||
2402 | 13 | from twisted.internet.task import Clock | ||
2403 | 14 | from twisted.python.failure import Failure | ||
2404 | 15 | from twisted.trial.unittest import TestCase as TrialTestCase | ||
2407 | 16 | 13 | ||
2408 | 17 | from zope.component import getUtility | 14 | from zope.component import getUtility |
2409 | 18 | from zope.security.proxy import removeSecurityProxy | 15 | from zope.security.proxy import removeSecurityProxy |
2410 | 19 | 16 | ||
2411 | 20 | from canonical.buildd.slave import BuilderStatus | 17 | from canonical.buildd.slave import BuilderStatus |
2413 | 21 | from canonical.config import config | 18 | from canonical.buildd.tests.harness import BuilddSlaveTestSetup |
2414 | 22 | from canonical.database.sqlbase import flush_database_updates | 19 | from canonical.database.sqlbase import flush_database_updates |
2415 | 23 | from canonical.launchpad.scripts import QuietFakeLogger | ||
2416 | 24 | from canonical.launchpad.webapp.interfaces import ( | 20 | from canonical.launchpad.webapp.interfaces import ( |
2417 | 25 | DEFAULT_FLAVOR, | 21 | DEFAULT_FLAVOR, |
2418 | 26 | IStoreSelector, | 22 | IStoreSelector, |
2419 | @@ -28,38 +24,21 @@ | |||
2420 | 28 | ) | 24 | ) |
2421 | 29 | from canonical.testing.layers import ( | 25 | from canonical.testing.layers import ( |
2422 | 30 | DatabaseFunctionalLayer, | 26 | DatabaseFunctionalLayer, |
2426 | 31 | LaunchpadZopelessLayer, | 27 | LaunchpadZopelessLayer |
2424 | 32 | TwistedLaunchpadZopelessLayer, | ||
2425 | 33 | TwistedLayer, | ||
2427 | 34 | ) | 28 | ) |
2428 | 35 | from lp.buildmaster.enums import BuildStatus | 29 | from lp.buildmaster.enums import BuildStatus |
2434 | 36 | from lp.buildmaster.interfaces.builder import ( | 30 | from lp.buildmaster.interfaces.builder import IBuilder, IBuilderSet |
2430 | 37 | CannotFetchFile, | ||
2431 | 38 | IBuilder, | ||
2432 | 39 | IBuilderSet, | ||
2433 | 40 | ) | ||
2435 | 41 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( | 31 | from lp.buildmaster.interfaces.buildfarmjobbehavior import ( |
2436 | 42 | IBuildFarmJobBehavior, | 32 | IBuildFarmJobBehavior, |
2437 | 43 | ) | 33 | ) |
2438 | 44 | from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet | 34 | from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet |
2440 | 45 | from lp.buildmaster.interfaces.builder import CannotResumeHost | 35 | from lp.buildmaster.model.builder import BuilderSlave |
2441 | 46 | from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior | 36 | from lp.buildmaster.model.buildfarmjobbehavior import IdleBuildBehavior |
2442 | 47 | from lp.buildmaster.model.buildqueue import BuildQueue | 37 | from lp.buildmaster.model.buildqueue import BuildQueue |
2443 | 48 | from lp.buildmaster.tests.mock_slaves import ( | 38 | from lp.buildmaster.tests.mock_slaves import ( |
2444 | 49 | AbortedSlave, | 39 | AbortedSlave, |
2445 | 50 | AbortingSlave, | ||
2446 | 51 | BrokenSlave, | ||
2447 | 52 | BuildingSlave, | ||
2448 | 53 | CorruptBehavior, | ||
2449 | 54 | DeadProxy, | ||
2450 | 55 | LostBuildingBrokenSlave, | ||
2451 | 56 | MockBuilder, | 40 | MockBuilder, |
2452 | 57 | OkSlave, | ||
2453 | 58 | SlaveTestHelpers, | ||
2454 | 59 | TrivialBehavior, | ||
2455 | 60 | WaitingSlave, | ||
2456 | 61 | ) | 41 | ) |
2457 | 62 | from lp.services.job.interfaces.job import JobStatus | ||
2458 | 63 | from lp.soyuz.enums import ( | 42 | from lp.soyuz.enums import ( |
2459 | 64 | ArchivePurpose, | 43 | ArchivePurpose, |
2460 | 65 | PackagePublishingStatus, | 44 | PackagePublishingStatus, |
2461 | @@ -70,12 +49,9 @@ | |||
2462 | 70 | ) | 49 | ) |
2463 | 71 | from lp.soyuz.tests.test_publishing import SoyuzTestPublisher | 50 | from lp.soyuz.tests.test_publishing import SoyuzTestPublisher |
2464 | 72 | from lp.testing import ( | 51 | from lp.testing import ( |
2468 | 73 | ANONYMOUS, | 52 | TestCase, |
2466 | 74 | login_as, | ||
2467 | 75 | logout, | ||
2469 | 76 | TestCaseWithFactory, | 53 | TestCaseWithFactory, |
2470 | 77 | ) | 54 | ) |
2471 | 78 | from lp.testing.factory import LaunchpadObjectFactory | ||
2472 | 79 | from lp.testing.fakemethod import FakeMethod | 55 | from lp.testing.fakemethod import FakeMethod |
2473 | 80 | 56 | ||
2474 | 81 | 57 | ||
2475 | @@ -116,121 +92,42 @@ | |||
2476 | 116 | bq = builder.getBuildQueue() | 92 | bq = builder.getBuildQueue() |
2477 | 117 | self.assertIs(None, bq) | 93 | self.assertIs(None, bq) |
2478 | 118 | 94 | ||
2594 | 119 | 95 | def test_updateBuilderStatus_catches_repeated_EINTR(self): | |
2595 | 120 | class TestBuilderWithTrial(TrialTestCase): | 96 | # A single EINTR return from a socket operation should cause the |
2596 | 121 | 97 | # operation to be retried, not fail/reset the builder. | |
2597 | 122 | layer = TwistedLaunchpadZopelessLayer | 98 | builder = removeSecurityProxy(self.factory.makeBuilder()) |
2598 | 123 | 99 | builder.handleTimeout = FakeMethod() | |
2599 | 124 | def setUp(self): | 100 | builder.rescueIfLost = FakeMethod() |
2600 | 125 | super(TestBuilderWithTrial, self) | 101 | |
2601 | 126 | self.slave_helper = SlaveTestHelpers() | 102 | def _fake_checkSlaveAlive(): |
2602 | 127 | self.slave_helper.setUp() | 103 | # Raise an EINTR error for all invocations. |
2603 | 128 | self.addCleanup(self.slave_helper.cleanUp) | 104 | raise socket.error(errno.EINTR, "fake eintr") |
2604 | 129 | self.factory = LaunchpadObjectFactory() | 105 | |
2605 | 130 | login_as(ANONYMOUS) | 106 | builder.checkSlaveAlive = _fake_checkSlaveAlive |
2606 | 131 | self.addCleanup(logout) | 107 | builder.updateStatus() |
2607 | 132 | 108 | ||
2608 | 133 | def test_updateStatus_aborts_lost_and_broken_slave(self): | 109 | # builder.updateStatus should eventually have called |
2609 | 134 | # A slave that's 'lost' should be aborted; when the slave is | 110 | # handleTimeout() |
2610 | 135 | # broken then abort() should also throw a fault. | 111 | self.assertEqual(1, builder.handleTimeout.call_count) |
2611 | 136 | slave = LostBuildingBrokenSlave() | 112 | |
2612 | 137 | lostbuilding_builder = MockBuilder( | 113 | def test_updateBuilderStatus_catches_single_EINTR(self): |
2613 | 138 | 'Lost Building Broken Slave', slave, behavior=CorruptBehavior()) | 114 | builder = removeSecurityProxy(self.factory.makeBuilder()) |
2614 | 139 | d = lostbuilding_builder.updateStatus(QuietFakeLogger()) | 115 | builder.handleTimeout = FakeMethod() |
2615 | 140 | def check_slave_status(failure): | 116 | builder.rescueIfLost = FakeMethod() |
2616 | 141 | self.assertIn('abort', slave.call_log) | 117 | self.eintr_returned = False |
2617 | 142 | # 'Fault' comes from the LostBuildingBrokenSlave, this is | 118 | |
2618 | 143 | # just testing that the value is passed through. | 119 | def _fake_checkSlaveAlive(): |
2619 | 144 | self.assertIsInstance(failure.value, xmlrpclib.Fault) | 120 | # raise an EINTR error for the first invocation only. |
2620 | 145 | return d.addBoth(check_slave_status) | 121 | if not self.eintr_returned: |
2621 | 146 | 122 | self.eintr_returned = True | |
2622 | 147 | def test_resumeSlaveHost_nonvirtual(self): | 123 | raise socket.error(errno.EINTR, "fake eintr") |
2623 | 148 | builder = self.factory.makeBuilder(virtualized=False) | 124 | |
2624 | 149 | d = builder.resumeSlaveHost() | 125 | builder.checkSlaveAlive = _fake_checkSlaveAlive |
2625 | 150 | return self.assertFailure(d, CannotResumeHost) | 126 | builder.updateStatus() |
2626 | 151 | 127 | ||
2627 | 152 | def test_resumeSlaveHost_no_vmhost(self): | 128 | # builder.updateStatus should never call handleTimeout() for a |
2628 | 153 | builder = self.factory.makeBuilder(virtualized=True, vm_host=None) | 129 | # single EINTR. |
2629 | 154 | d = builder.resumeSlaveHost() | 130 | self.assertEqual(0, builder.handleTimeout.call_count) |
2515 | 155 | return self.assertFailure(d, CannotResumeHost) | ||
2516 | 156 | |||
2517 | 157 | def test_resumeSlaveHost_success(self): | ||
2518 | 158 | reset_config = """ | ||
2519 | 159 | [builddmaster] | ||
2520 | 160 | vm_resume_command: /bin/echo -n parp""" | ||
2521 | 161 | config.push('reset', reset_config) | ||
2522 | 162 | self.addCleanup(config.pop, 'reset') | ||
2523 | 163 | |||
2524 | 164 | builder = self.factory.makeBuilder(virtualized=True, vm_host="pop") | ||
2525 | 165 | d = builder.resumeSlaveHost() | ||
2526 | 166 | def got_resume(output): | ||
2527 | 167 | self.assertEqual(('parp', ''), output) | ||
2528 | 168 | return d.addCallback(got_resume) | ||
2529 | 169 | |||
2530 | 170 | def test_resumeSlaveHost_command_failed(self): | ||
2531 | 171 | reset_fail_config = """ | ||
2532 | 172 | [builddmaster] | ||
2533 | 173 | vm_resume_command: /bin/false""" | ||
2534 | 174 | config.push('reset fail', reset_fail_config) | ||
2535 | 175 | self.addCleanup(config.pop, 'reset fail') | ||
2536 | 176 | builder = self.factory.makeBuilder(virtualized=True, vm_host="pop") | ||
2537 | 177 | d = builder.resumeSlaveHost() | ||
2538 | 178 | return self.assertFailure(d, CannotResumeHost) | ||
2539 | 179 | |||
2540 | 180 | def test_handleTimeout_resume_failure(self): | ||
2541 | 181 | reset_fail_config = """ | ||
2542 | 182 | [builddmaster] | ||
2543 | 183 | vm_resume_command: /bin/false""" | ||
2544 | 184 | config.push('reset fail', reset_fail_config) | ||
2545 | 185 | self.addCleanup(config.pop, 'reset fail') | ||
2546 | 186 | builder = self.factory.makeBuilder(virtualized=True, vm_host="pop") | ||
2547 | 187 | builder.builderok = True | ||
2548 | 188 | d = builder.handleTimeout(QuietFakeLogger(), 'blah') | ||
2549 | 189 | return self.assertFailure(d, CannotResumeHost) | ||
2550 | 190 | |||
2551 | 191 | def _setupRecipeBuildAndBuilder(self): | ||
2552 | 192 | # Helper function to make a builder capable of building a | ||
2553 | 193 | # recipe, returning both. | ||
2554 | 194 | processor = self.factory.makeProcessor(name="i386") | ||
2555 | 195 | builder = self.factory.makeBuilder( | ||
2556 | 196 | processor=processor, virtualized=True, vm_host="bladh") | ||
2557 | 197 | builder.setSlaveForTesting(OkSlave()) | ||
2558 | 198 | distroseries = self.factory.makeDistroSeries() | ||
2559 | 199 | das = self.factory.makeDistroArchSeries( | ||
2560 | 200 | distroseries=distroseries, architecturetag="i386", | ||
2561 | 201 | processorfamily=processor.family) | ||
2562 | 202 | chroot = self.factory.makeLibraryFileAlias() | ||
2563 | 203 | das.addOrUpdateChroot(chroot) | ||
2564 | 204 | distroseries.nominatedarchindep = das | ||
2565 | 205 | build = self.factory.makeSourcePackageRecipeBuild( | ||
2566 | 206 | distroseries=distroseries) | ||
2567 | 207 | return builder, build | ||
2568 | 208 | |||
2569 | 209 | def test_findAndStartJob_returns_candidate(self): | ||
2570 | 210 | # findAndStartJob finds the next queued job using _findBuildCandidate. | ||
2571 | 211 | # We don't care about the type of build at all. | ||
2572 | 212 | builder, build = self._setupRecipeBuildAndBuilder() | ||
2573 | 213 | candidate = build.queueBuild() | ||
2574 | 214 | # _findBuildCandidate is tested elsewhere, we just make sure that | ||
2575 | 215 | # findAndStartJob delegates to it. | ||
2576 | 216 | removeSecurityProxy(builder)._findBuildCandidate = FakeMethod( | ||
2577 | 217 | result=candidate) | ||
2578 | 218 | d = builder.findAndStartJob() | ||
2579 | 219 | return d.addCallback(self.assertEqual, candidate) | ||
2580 | 220 | |||
2581 | 221 | def test_findAndStartJob_starts_job(self): | ||
2582 | 222 | # findAndStartJob finds the next queued job using _findBuildCandidate | ||
2583 | 223 | # and then starts it. | ||
2584 | 224 | # We don't care about the type of build at all. | ||
2585 | 225 | builder, build = self._setupRecipeBuildAndBuilder() | ||
2586 | 226 | candidate = build.queueBuild() | ||
2587 | 227 | removeSecurityProxy(builder)._findBuildCandidate = FakeMethod( | ||
2588 | 228 | result=candidate) | ||
2589 | 229 | d = builder.findAndStartJob() | ||
2590 | 230 | def check_build_started(candidate): | ||
2591 | 231 | self.assertEqual(candidate.builder, builder) | ||
2592 | 232 | self.assertEqual(BuildStatus.BUILDING, build.status) | ||
2593 | 233 | return d.addCallback(check_build_started) | ||
2630 | 234 | 131 | ||
2631 | 235 | def test_slave(self): | 132 | def test_slave(self): |
2632 | 236 | # Builder.slave is a BuilderSlave that points at the actual Builder. | 133 | # Builder.slave is a BuilderSlave that points at the actual Builder. |
2633 | @@ -239,147 +136,25 @@ | |||
2634 | 239 | builder = removeSecurityProxy(self.factory.makeBuilder()) | 136 | builder = removeSecurityProxy(self.factory.makeBuilder()) |
2635 | 240 | self.assertEqual(builder.url, builder.slave.url) | 137 | self.assertEqual(builder.url, builder.slave.url) |
2636 | 241 | 138 | ||
2637 | 139 | |||
2638 | 140 | class Test_rescueBuilderIfLost(TestCaseWithFactory): | ||
2639 | 141 | """Tests for lp.buildmaster.model.builder.rescueBuilderIfLost.""" | ||
2640 | 142 | |||
2641 | 143 | layer = LaunchpadZopelessLayer | ||
2642 | 144 | |||
2643 | 242 | def test_recovery_of_aborted_slave(self): | 145 | def test_recovery_of_aborted_slave(self): |
2644 | 243 | # If a slave is in the ABORTED state, rescueBuilderIfLost should | 146 | # If a slave is in the ABORTED state, rescueBuilderIfLost should |
2645 | 244 | # clean it if we don't think it's currently building anything. | 147 | # clean it if we don't think it's currently building anything. |
2646 | 245 | # See bug 463046. | 148 | # See bug 463046. |
2647 | 246 | aborted_slave = AbortedSlave() | 149 | aborted_slave = AbortedSlave() |
2648 | 150 | # The slave's clean() method is normally an XMLRPC call, so we | ||
2649 | 151 | # can just stub it out and check that it got called. | ||
2650 | 152 | aborted_slave.clean = FakeMethod() | ||
2651 | 247 | builder = MockBuilder("mock_builder", aborted_slave) | 153 | builder = MockBuilder("mock_builder", aborted_slave) |
2652 | 248 | builder.currentjob = None | 154 | builder.currentjob = None |
2787 | 249 | d = builder.rescueIfLost() | 155 | builder.rescueIfLost() |
2788 | 250 | def check_slave_calls(ignored): | 156 | |
2789 | 251 | self.assertIn('clean', aborted_slave.call_log) | 157 | self.assertEqual(1, aborted_slave.clean.call_count) |
2656 | 252 | return d.addCallback(check_slave_calls) | ||
2657 | 253 | |||
2658 | 254 | def test_recover_ok_slave(self): | ||
2659 | 255 | # An idle slave is not rescued. | ||
2660 | 256 | slave = OkSlave() | ||
2661 | 257 | builder = MockBuilder("mock_builder", slave, TrivialBehavior()) | ||
2662 | 258 | d = builder.rescueIfLost() | ||
2663 | 259 | def check_slave_calls(ignored): | ||
2664 | 260 | self.assertNotIn('abort', slave.call_log) | ||
2665 | 261 | self.assertNotIn('clean', slave.call_log) | ||
2666 | 262 | return d.addCallback(check_slave_calls) | ||
2667 | 263 | |||
2668 | 264 | def test_recover_waiting_slave_with_good_id(self): | ||
2669 | 265 | # rescueIfLost does not attempt to abort or clean a builder that is | ||
2670 | 266 | # WAITING. | ||
2671 | 267 | waiting_slave = WaitingSlave() | ||
2672 | 268 | builder = MockBuilder("mock_builder", waiting_slave, TrivialBehavior()) | ||
2673 | 269 | d = builder.rescueIfLost() | ||
2674 | 270 | def check_slave_calls(ignored): | ||
2675 | 271 | self.assertNotIn('abort', waiting_slave.call_log) | ||
2676 | 272 | self.assertNotIn('clean', waiting_slave.call_log) | ||
2677 | 273 | return d.addCallback(check_slave_calls) | ||
2678 | 274 | |||
2679 | 275 | def test_recover_waiting_slave_with_bad_id(self): | ||
2680 | 276 | # If a slave is WAITING with a build for us to get, and the build | ||
2681 | 277 | # cookie cannot be verified, which means we don't recognize the build, | ||
2682 | 278 | # then rescueBuilderIfLost should attempt to abort it, so that the | ||
2683 | 279 | # builder is reset for a new build, and the corrupt build is | ||
2684 | 280 | # discarded. | ||
2685 | 281 | waiting_slave = WaitingSlave() | ||
2686 | 282 | builder = MockBuilder("mock_builder", waiting_slave, CorruptBehavior()) | ||
2687 | 283 | d = builder.rescueIfLost() | ||
2688 | 284 | def check_slave_calls(ignored): | ||
2689 | 285 | self.assertNotIn('abort', waiting_slave.call_log) | ||
2690 | 286 | self.assertIn('clean', waiting_slave.call_log) | ||
2691 | 287 | return d.addCallback(check_slave_calls) | ||
2692 | 288 | |||
2693 | 289 | def test_recover_building_slave_with_good_id(self): | ||
2694 | 290 | # rescueIfLost does not attempt to abort or clean a builder that is | ||
2695 | 291 | # BUILDING. | ||
2696 | 292 | building_slave = BuildingSlave() | ||
2697 | 293 | builder = MockBuilder("mock_builder", building_slave, TrivialBehavior()) | ||
2698 | 294 | d = builder.rescueIfLost() | ||
2699 | 295 | def check_slave_calls(ignored): | ||
2700 | 296 | self.assertNotIn('abort', building_slave.call_log) | ||
2701 | 297 | self.assertNotIn('clean', building_slave.call_log) | ||
2702 | 298 | return d.addCallback(check_slave_calls) | ||
2703 | 299 | |||
2704 | 300 | def test_recover_building_slave_with_bad_id(self): | ||
2705 | 301 | # If a slave is BUILDING with a build id we don't recognize, then we | ||
2706 | 302 | # abort the build, thus stopping it in its tracks. | ||
2707 | 303 | building_slave = BuildingSlave() | ||
2708 | 304 | builder = MockBuilder("mock_builder", building_slave, CorruptBehavior()) | ||
2709 | 305 | d = builder.rescueIfLost() | ||
2710 | 306 | def check_slave_calls(ignored): | ||
2711 | 307 | self.assertIn('abort', building_slave.call_log) | ||
2712 | 308 | self.assertNotIn('clean', building_slave.call_log) | ||
2713 | 309 | return d.addCallback(check_slave_calls) | ||
2714 | 310 | |||
2715 | 311 | |||
2716 | 312 | class TestBuilderSlaveStatus(TestBuilderWithTrial): | ||
2717 | 313 | |||
2718 | 314 | # Verify what IBuilder.slaveStatus returns with slaves in different | ||
2719 | 315 | # states. | ||
2720 | 316 | |||
2721 | 317 | def assertStatus(self, slave, builder_status=None, | ||
2722 | 318 | build_status=None, logtail=False, filemap=None, | ||
2723 | 319 | dependencies=None): | ||
2724 | 320 | builder = self.factory.makeBuilder() | ||
2725 | 321 | builder.setSlaveForTesting(slave) | ||
2726 | 322 | d = builder.slaveStatus() | ||
2727 | 323 | |||
2728 | 324 | def got_status(status_dict): | ||
2729 | 325 | expected = {} | ||
2730 | 326 | if builder_status is not None: | ||
2731 | 327 | expected["builder_status"] = builder_status | ||
2732 | 328 | if build_status is not None: | ||
2733 | 329 | expected["build_status"] = build_status | ||
2734 | 330 | if dependencies is not None: | ||
2735 | 331 | expected["dependencies"] = dependencies | ||
2736 | 332 | |||
2737 | 333 | # We don't care so much about the content of the logtail, | ||
2738 | 334 | # just that it's there. | ||
2739 | 335 | if logtail: | ||
2740 | 336 | tail = status_dict.pop("logtail") | ||
2741 | 337 | self.assertIsInstance(tail, xmlrpclib.Binary) | ||
2742 | 338 | |||
2743 | 339 | self.assertEqual(expected, status_dict) | ||
2744 | 340 | |||
2745 | 341 | return d.addCallback(got_status) | ||
2746 | 342 | |||
2747 | 343 | def test_slaveStatus_idle_slave(self): | ||
2748 | 344 | self.assertStatus( | ||
2749 | 345 | OkSlave(), builder_status='BuilderStatus.IDLE') | ||
2750 | 346 | |||
2751 | 347 | def test_slaveStatus_building_slave(self): | ||
2752 | 348 | self.assertStatus( | ||
2753 | 349 | BuildingSlave(), builder_status='BuilderStatus.BUILDING', | ||
2754 | 350 | logtail=True) | ||
2755 | 351 | |||
2756 | 352 | def test_slaveStatus_waiting_slave(self): | ||
2757 | 353 | self.assertStatus( | ||
2758 | 354 | WaitingSlave(), builder_status='BuilderStatus.WAITING', | ||
2759 | 355 | build_status='BuildStatus.OK', filemap={}) | ||
2760 | 356 | |||
2761 | 357 | def test_slaveStatus_aborting_slave(self): | ||
2762 | 358 | self.assertStatus( | ||
2763 | 359 | AbortingSlave(), builder_status='BuilderStatus.ABORTING') | ||
2764 | 360 | |||
2765 | 361 | def test_slaveStatus_aborted_slave(self): | ||
2766 | 362 | self.assertStatus( | ||
2767 | 363 | AbortedSlave(), builder_status='BuilderStatus.ABORTED') | ||
2768 | 364 | |||
2769 | 365 | def test_isAvailable_with_not_builderok(self): | ||
2770 | 366 | # isAvailable() is a wrapper around slaveStatusSentence() | ||
2771 | 367 | builder = self.factory.makeBuilder() | ||
2772 | 368 | builder.builderok = False | ||
2773 | 369 | d = builder.isAvailable() | ||
2774 | 370 | return d.addCallback(self.assertFalse) | ||
2775 | 371 | |||
2776 | 372 | def test_isAvailable_with_slave_fault(self): | ||
2777 | 373 | builder = self.factory.makeBuilder() | ||
2778 | 374 | builder.setSlaveForTesting(BrokenSlave()) | ||
2779 | 375 | d = builder.isAvailable() | ||
2780 | 376 | return d.addCallback(self.assertFalse) | ||
2781 | 377 | |||
2782 | 378 | def test_isAvailable_with_slave_idle(self): | ||
2783 | 379 | builder = self.factory.makeBuilder() | ||
2784 | 380 | builder.setSlaveForTesting(OkSlave()) | ||
2785 | 381 | d = builder.isAvailable() | ||
2786 | 382 | return d.addCallback(self.assertTrue) | ||
2790 | 383 | 158 | ||
2791 | 384 | 159 | ||
2792 | 385 | class TestFindBuildCandidateBase(TestCaseWithFactory): | 160 | class TestFindBuildCandidateBase(TestCaseWithFactory): |
2793 | @@ -413,49 +188,6 @@ | |||
2794 | 413 | builder.manual = False | 188 | builder.manual = False |
2795 | 414 | 189 | ||
2796 | 415 | 190 | ||
2797 | 416 | class TestFindBuildCandidateGeneralCases(TestFindBuildCandidateBase): | ||
2798 | 417 | # Test usage of findBuildCandidate not specific to any archive type. | ||
2799 | 418 | |||
2800 | 419 | def test_findBuildCandidate_supersedes_builds(self): | ||
2801 | 420 | # IBuilder._findBuildCandidate identifies if there are builds | ||
2802 | 421 | # for superseded source package releases in the queue and marks | ||
2803 | 422 | # the corresponding build record as SUPERSEDED. | ||
2804 | 423 | archive = self.factory.makeArchive() | ||
2805 | 424 | self.publisher.getPubSource( | ||
2806 | 425 | sourcename="gedit", status=PackagePublishingStatus.PUBLISHED, | ||
2807 | 426 | archive=archive).createMissingBuilds() | ||
2808 | 427 | old_candidate = removeSecurityProxy( | ||
2809 | 428 | self.frog_builder)._findBuildCandidate() | ||
2810 | 429 | |||
2811 | 430 | # The candidate starts off as NEEDSBUILD: | ||
2812 | 431 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry( | ||
2813 | 432 | old_candidate) | ||
2814 | 433 | self.assertEqual(BuildStatus.NEEDSBUILD, build.status) | ||
2815 | 434 | |||
2816 | 435 | # Now supersede the source package: | ||
2817 | 436 | publication = build.current_source_publication | ||
2818 | 437 | publication.status = PackagePublishingStatus.SUPERSEDED | ||
2819 | 438 | |||
2820 | 439 | # The candidate returned is now a different one: | ||
2821 | 440 | new_candidate = removeSecurityProxy( | ||
2822 | 441 | self.frog_builder)._findBuildCandidate() | ||
2823 | 442 | self.assertNotEqual(new_candidate, old_candidate) | ||
2824 | 443 | |||
2825 | 444 | # And the old_candidate is superseded: | ||
2826 | 445 | self.assertEqual(BuildStatus.SUPERSEDED, build.status) | ||
2827 | 446 | |||
2828 | 447 | def test_acquireBuildCandidate_marks_building(self): | ||
2829 | 448 | # acquireBuildCandidate() should call _findBuildCandidate and | ||
2830 | 449 | # mark the build as building. | ||
2831 | 450 | archive = self.factory.makeArchive() | ||
2832 | 451 | self.publisher.getPubSource( | ||
2833 | 452 | sourcename="gedit", status=PackagePublishingStatus.PUBLISHED, | ||
2834 | 453 | archive=archive).createMissingBuilds() | ||
2835 | 454 | candidate = removeSecurityProxy( | ||
2836 | 455 | self.frog_builder).acquireBuildCandidate() | ||
2837 | 456 | self.assertEqual(JobStatus.RUNNING, candidate.job.status) | ||
2838 | 457 | |||
2839 | 458 | |||
2840 | 459 | class TestFindBuildCandidatePPAWithSingleBuilder(TestCaseWithFactory): | 191 | class TestFindBuildCandidatePPAWithSingleBuilder(TestCaseWithFactory): |
2841 | 460 | 192 | ||
2842 | 461 | layer = LaunchpadZopelessLayer | 193 | layer = LaunchpadZopelessLayer |
2843 | @@ -588,16 +320,6 @@ | |||
2844 | 588 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job) | 320 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job) |
2845 | 589 | self.failUnlessEqual('joesppa', build.archive.name) | 321 | self.failUnlessEqual('joesppa', build.archive.name) |
2846 | 590 | 322 | ||
2847 | 591 | def test_findBuildCandidate_with_disabled_archive(self): | ||
2848 | 592 | # Disabled archives should not be considered for dispatching | ||
2849 | 593 | # builds. | ||
2850 | 594 | disabled_job = removeSecurityProxy(self.builder4)._findBuildCandidate() | ||
2851 | 595 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry( | ||
2852 | 596 | disabled_job) | ||
2853 | 597 | build.archive.disable() | ||
2854 | 598 | next_job = removeSecurityProxy(self.builder4)._findBuildCandidate() | ||
2855 | 599 | self.assertNotEqual(disabled_job, next_job) | ||
2856 | 600 | |||
2857 | 601 | 323 | ||
2858 | 602 | class TestFindBuildCandidatePrivatePPA(TestFindBuildCandidatePPABase): | 324 | class TestFindBuildCandidatePrivatePPA(TestFindBuildCandidatePPABase): |
2859 | 603 | 325 | ||
2860 | @@ -610,14 +332,6 @@ | |||
2861 | 610 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job) | 332 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(next_job) |
2862 | 611 | self.failUnlessEqual('joesppa', build.archive.name) | 333 | self.failUnlessEqual('joesppa', build.archive.name) |
2863 | 612 | 334 | ||
2864 | 613 | # If the source for the build is still pending, it won't be | ||
2865 | 614 | # dispatched because the builder has to fetch the source files | ||
2866 | 615 | # from the (password protected) repo area, not the librarian. | ||
2867 | 616 | pub = build.current_source_publication | ||
2868 | 617 | pub.status = PackagePublishingStatus.PENDING | ||
2869 | 618 | candidate = removeSecurityProxy(self.builder4)._findBuildCandidate() | ||
2870 | 619 | self.assertNotEqual(next_job.id, candidate.id) | ||
2871 | 620 | |||
2872 | 621 | 335 | ||
2873 | 622 | class TestFindBuildCandidateDistroArchive(TestFindBuildCandidateBase): | 336 | class TestFindBuildCandidateDistroArchive(TestFindBuildCandidateBase): |
2874 | 623 | 337 | ||
2875 | @@ -760,48 +474,97 @@ | |||
2876 | 760 | self.builder.current_build_behavior, BinaryPackageBuildBehavior) | 474 | self.builder.current_build_behavior, BinaryPackageBuildBehavior) |
2877 | 761 | 475 | ||
2878 | 762 | 476 | ||
2880 | 763 | class TestSlave(TrialTestCase): | 477 | class TestSlave(TestCase): |
2881 | 764 | """ | 478 | """ |
2882 | 765 | Integration tests for BuilderSlave that verify how it works against a | 479 | Integration tests for BuilderSlave that verify how it works against a |
2883 | 766 | real slave server. | 480 | real slave server. |
2884 | 767 | """ | 481 | """ |
2885 | 768 | 482 | ||
2886 | 769 | layer = TwistedLayer | ||
2887 | 770 | |||
2888 | 771 | def setUp(self): | ||
2889 | 772 | super(TestSlave, self).setUp() | ||
2890 | 773 | self.slave_helper = SlaveTestHelpers() | ||
2891 | 774 | self.slave_helper.setUp() | ||
2892 | 775 | self.addCleanup(self.slave_helper.cleanUp) | ||
2893 | 776 | |||
2894 | 777 | # XXX: JonathanLange 2010-09-20 bug=643521: There are also tests for | 483 | # XXX: JonathanLange 2010-09-20 bug=643521: There are also tests for |
2895 | 778 | # BuilderSlave in buildd-slave.txt and in other places. The tests here | 484 | # BuilderSlave in buildd-slave.txt and in other places. The tests here |
2896 | 779 | # ought to become the canonical tests for BuilderSlave vs running buildd | 485 | # ought to become the canonical tests for BuilderSlave vs running buildd |
2897 | 780 | # XML-RPC server interaction. | 486 | # XML-RPC server interaction. |
2898 | 781 | 487 | ||
2899 | 488 | # The URL for the XML-RPC service set up by `BuilddSlaveTestSetup`. | ||
2900 | 489 | TEST_URL = 'http://localhost:8221/rpc/' | ||
2901 | 490 | |||
2902 | 491 | def getServerSlave(self): | ||
2903 | 492 | """Set up a test build slave server. | ||
2904 | 493 | |||
2905 | 494 | :return: A `BuilddSlaveTestSetup` object. | ||
2906 | 495 | """ | ||
2907 | 496 | tachandler = BuilddSlaveTestSetup() | ||
2908 | 497 | tachandler.setUp() | ||
2909 | 498 | self.addCleanup(tachandler.tearDown) | ||
2910 | 499 | def addLogFile(exc_info): | ||
2911 | 500 | self.addDetail( | ||
2912 | 501 | 'xmlrpc-log-file', | ||
2913 | 502 | Content(UTF8_TEXT, lambda: open(tachandler.logfile, 'r').read())) | ||
2914 | 503 | self.addOnException(addLogFile) | ||
2915 | 504 | return tachandler | ||
2916 | 505 | |||
2917 | 506 | def getClientSlave(self): | ||
2918 | 507 | """Return a `BuilderSlave` for use in testing. | ||
2919 | 508 | |||
2920 | 509 | Points to a fixed URL that is also used by `BuilddSlaveTestSetup`. | ||
2921 | 510 | """ | ||
2922 | 511 | return BuilderSlave.makeBlockingSlave(self.TEST_URL, 'vmhost') | ||
2923 | 512 | |||
2924 | 513 | def makeCacheFile(self, tachandler, filename): | ||
2925 | 514 | """Make a cache file available on the remote slave. | ||
2926 | 515 | |||
2927 | 516 | :param tachandler: The TacTestSetup object used to start the remote | ||
2928 | 517 | slave. | ||
2929 | 518 | :param filename: The name of the file to create in the file cache | ||
2930 | 519 | area. | ||
2931 | 520 | """ | ||
2932 | 521 | path = os.path.join(tachandler.root, 'filecache', filename) | ||
2933 | 522 | fd = open(path, 'w') | ||
2934 | 523 | fd.write('something') | ||
2935 | 524 | fd.close() | ||
2936 | 525 | self.addCleanup(os.unlink, path) | ||
2937 | 526 | |||
2938 | 527 | def triggerGoodBuild(self, slave, build_id=None): | ||
2939 | 528 | """Trigger a good build on 'slave'. | ||
2940 | 529 | |||
2941 | 530 | :param slave: A `BuilderSlave` instance to trigger the build on. | ||
2942 | 531 | :param build_id: The build identifier. If not specified, defaults to | ||
2943 | 532 | an arbitrary string. | ||
2944 | 533 | :type build_id: str | ||
2945 | 534 | :return: The build id returned by the slave. | ||
2946 | 535 | """ | ||
2947 | 536 | if build_id is None: | ||
2948 | 537 | build_id = self.getUniqueString() | ||
2949 | 538 | tachandler = self.getServerSlave() | ||
2950 | 539 | chroot_file = 'fake-chroot' | ||
2951 | 540 | dsc_file = 'thing' | ||
2952 | 541 | self.makeCacheFile(tachandler, chroot_file) | ||
2953 | 542 | self.makeCacheFile(tachandler, dsc_file) | ||
2954 | 543 | return slave.build( | ||
2955 | 544 | build_id, 'debian', chroot_file, {'.dsc': dsc_file}, | ||
2956 | 545 | {'ogrecomponent': 'main'}) | ||
2957 | 546 | |||
2958 | 782 | # XXX 2010-10-06 Julian bug=655559 | 547 | # XXX 2010-10-06 Julian bug=655559 |
2959 | 783 | # This is failing on buildbot but not locally; it's trying to abort | 548 | # This is failing on buildbot but not locally; it's trying to abort |
2960 | 784 | # before the build has started. | 549 | # before the build has started. |
2961 | 785 | def disabled_test_abort(self): | 550 | def disabled_test_abort(self): |
2963 | 786 | slave = self.slave_helper.getClientSlave() | 551 | slave = self.getClientSlave() |
2964 | 787 | # We need to be in a BUILDING state before we can abort. | 552 | # We need to be in a BUILDING state before we can abort. |
2969 | 788 | d = self.slave_helper.triggerGoodBuild(slave) | 553 | self.triggerGoodBuild(slave) |
2970 | 789 | d.addCallback(lambda ignored: slave.abort()) | 554 | result = slave.abort() |
2971 | 790 | d.addCallback(self.assertEqual, BuilderStatus.ABORTING) | 555 | self.assertEqual(result, BuilderStatus.ABORTING) |
2968 | 791 | return d | ||
2972 | 792 | 556 | ||
2973 | 793 | def test_build(self): | 557 | def test_build(self): |
2974 | 794 | # Calling 'build' with an expected builder type, a good build id, | 558 | # Calling 'build' with an expected builder type, a good build id, |
2975 | 795 | # valid chroot & filemaps works and returns a BuilderStatus of | 559 | # valid chroot & filemaps works and returns a BuilderStatus of |
2976 | 796 | # BUILDING. | 560 | # BUILDING. |
2977 | 797 | build_id = 'some-id' | 561 | build_id = 'some-id' |
2982 | 798 | slave = self.slave_helper.getClientSlave() | 562 | slave = self.getClientSlave() |
2983 | 799 | d = self.slave_helper.triggerGoodBuild(slave, build_id) | 563 | result = self.triggerGoodBuild(slave, build_id) |
2984 | 800 | return d.addCallback( | 564 | self.assertEqual([BuilderStatus.BUILDING, build_id], result) |
2981 | 801 | self.assertEqual, [BuilderStatus.BUILDING, build_id]) | ||
2985 | 802 | 565 | ||
2986 | 803 | def test_clean(self): | 566 | def test_clean(self): |
2988 | 804 | slave = self.slave_helper.getClientSlave() | 567 | slave = self.getClientSlave() |
2989 | 805 | # XXX: JonathanLange 2010-09-21: Calling clean() on the slave requires | 568 | # XXX: JonathanLange 2010-09-21: Calling clean() on the slave requires |
2990 | 806 | # it to be in either the WAITING or ABORTED states, and both of these | 569 | # it to be in either the WAITING or ABORTED states, and both of these |
2991 | 807 | # states are very difficult to achieve in a test environment. For the | 570 | # states are very difficult to achieve in a test environment. For the |
2992 | @@ -811,248 +574,57 @@ | |||
2993 | 811 | def test_echo(self): | 574 | def test_echo(self): |
2994 | 812 | # Calling 'echo' contacts the server which returns the arguments we | 575 | # Calling 'echo' contacts the server which returns the arguments we |
2995 | 813 | # gave it. | 576 | # gave it. |
3000 | 814 | self.slave_helper.getServerSlave() | 577 | self.getServerSlave() |
3001 | 815 | slave = self.slave_helper.getClientSlave() | 578 | slave = self.getClientSlave() |
3002 | 816 | d = slave.echo('foo', 'bar', 42) | 579 | result = slave.echo('foo', 'bar', 42) |
3003 | 817 | return d.addCallback(self.assertEqual, ['foo', 'bar', 42]) | 580 | self.assertEqual(['foo', 'bar', 42], result) |
3004 | 818 | 581 | ||
3005 | 819 | def test_info(self): | 582 | def test_info(self): |
3006 | 820 | # Calling 'info' gets some information about the slave. | 583 | # Calling 'info' gets some information about the slave. |
3010 | 821 | self.slave_helper.getServerSlave() | 584 | self.getServerSlave() |
3011 | 822 | slave = self.slave_helper.getClientSlave() | 585 | slave = self.getClientSlave() |
3012 | 823 | d = slave.info() | 586 | result = slave.info() |
3013 | 824 | # We're testing the hard-coded values, since the version is hard-coded | 587 | # We're testing the hard-coded values, since the version is hard-coded |
3014 | 825 | # into the remote slave, the supported build managers are hard-coded | 588 | # into the remote slave, the supported build managers are hard-coded |
3015 | 826 | # into the tac file for the remote slave and config is returned from | 589 | # into the tac file for the remote slave and config is returned from |
3016 | 827 | # the configuration file. | 590 | # the configuration file. |
3019 | 828 | return d.addCallback( | 591 | self.assertEqual( |
3018 | 829 | self.assertEqual, | ||
3020 | 830 | ['1.0', | 592 | ['1.0', |
3021 | 831 | 'i386', | 593 | 'i386', |
3022 | 832 | ['sourcepackagerecipe', | 594 | ['sourcepackagerecipe', |
3024 | 833 | 'translation-templates', 'binarypackage', 'debian']]) | 595 | 'translation-templates', 'binarypackage', 'debian']], |
3025 | 596 | result) | ||
3026 | 834 | 597 | ||
3027 | 835 | def test_initial_status(self): | 598 | def test_initial_status(self): |
3028 | 836 | # Calling 'status' returns the current status of the slave. The | 599 | # Calling 'status' returns the current status of the slave. The |
3029 | 837 | # initial status is IDLE. | 600 | # initial status is IDLE. |
3034 | 838 | self.slave_helper.getServerSlave() | 601 | self.getServerSlave() |
3035 | 839 | slave = self.slave_helper.getClientSlave() | 602 | slave = self.getClientSlave() |
3036 | 840 | d = slave.status() | 603 | status = slave.status() |
3037 | 841 | return d.addCallback(self.assertEqual, [BuilderStatus.IDLE, '']) | 604 | self.assertEqual([BuilderStatus.IDLE, ''], status) |
3038 | 842 | 605 | ||
3039 | 843 | def test_status_after_build(self): | 606 | def test_status_after_build(self): |
3040 | 844 | # Calling 'status' returns the current status of the slave. After a | 607 | # Calling 'status' returns the current status of the slave. After a |
3041 | 845 | # build has been triggered, the status is BUILDING. | 608 | # build has been triggered, the status is BUILDING. |
3043 | 846 | slave = self.slave_helper.getClientSlave() | 609 | slave = self.getClientSlave() |
3044 | 847 | build_id = 'status-build-id' | 610 | build_id = 'status-build-id' |
3052 | 848 | d = self.slave_helper.triggerGoodBuild(slave, build_id) | 611 | self.triggerGoodBuild(slave, build_id) |
3053 | 849 | d.addCallback(lambda ignored: slave.status()) | 612 | status = slave.status() |
3054 | 850 | def check_status(status): | 613 | self.assertEqual([BuilderStatus.BUILDING, build_id], status[:2]) |
3055 | 851 | self.assertEqual([BuilderStatus.BUILDING, build_id], status[:2]) | 614 | [log_file] = status[2:] |
3056 | 852 | [log_file] = status[2:] | 615 | self.assertIsInstance(log_file, xmlrpclib.Binary) |
3050 | 853 | self.assertIsInstance(log_file, xmlrpclib.Binary) | ||
3051 | 854 | return d.addCallback(check_status) | ||
3057 | 855 | 616 | ||
3058 | 856 | def test_ensurepresent_not_there(self): | 617 | def test_ensurepresent_not_there(self): |
3059 | 857 | # ensurepresent checks to see if a file is there. | 618 | # ensurepresent checks to see if a file is there. |
3065 | 858 | self.slave_helper.getServerSlave() | 619 | self.getServerSlave() |
3066 | 859 | slave = self.slave_helper.getClientSlave() | 620 | slave = self.getClientSlave() |
3067 | 860 | d = slave.ensurepresent('blahblah', None, None, None) | 621 | result = slave.ensurepresent('blahblah', None, None, None) |
3068 | 861 | d.addCallback(self.assertEqual, [False, 'No URL']) | 622 | self.assertEqual([False, 'No URL'], result) |
3064 | 862 | return d | ||
3069 | 863 | 623 | ||
3070 | 864 | def test_ensurepresent_actually_there(self): | 624 | def test_ensurepresent_actually_there(self): |
3071 | 865 | # ensurepresent checks to see if a file is there. | 625 | # ensurepresent checks to see if a file is there. |
3265 | 866 | tachandler = self.slave_helper.getServerSlave() | 626 | tachandler = self.getServerSlave() |
3266 | 867 | slave = self.slave_helper.getClientSlave() | 627 | slave = self.getClientSlave() |
3267 | 868 | self.slave_helper.makeCacheFile(tachandler, 'blahblah') | 628 | self.makeCacheFile(tachandler, 'blahblah') |
3268 | 869 | d = slave.ensurepresent('blahblah', None, None, None) | 629 | result = slave.ensurepresent('blahblah', None, None, None) |
3269 | 870 | d.addCallback(self.assertEqual, [True, 'No URL']) | 630 | self.assertEqual([True, 'No URL'], result) |
3077 | 871 | return d | ||
3078 | 872 | |||
3079 | 873 | def test_sendFileToSlave_not_there(self): | ||
3080 | 874 | self.slave_helper.getServerSlave() | ||
3081 | 875 | slave = self.slave_helper.getClientSlave() | ||
3082 | 876 | d = slave.sendFileToSlave('blahblah', None, None, None) | ||
3083 | 877 | return self.assertFailure(d, CannotFetchFile) | ||
3084 | 878 | |||
3085 | 879 | def test_sendFileToSlave_actually_there(self): | ||
3086 | 880 | tachandler = self.slave_helper.getServerSlave() | ||
3087 | 881 | slave = self.slave_helper.getClientSlave() | ||
3088 | 882 | self.slave_helper.makeCacheFile(tachandler, 'blahblah') | ||
3089 | 883 | d = slave.sendFileToSlave('blahblah', None, None, None) | ||
3090 | 884 | def check_present(ignored): | ||
3091 | 885 | d = slave.ensurepresent('blahblah', None, None, None) | ||
3092 | 886 | return d.addCallback(self.assertEqual, [True, 'No URL']) | ||
3093 | 887 | d.addCallback(check_present) | ||
3094 | 888 | return d | ||
3095 | 889 | |||
3096 | 890 | def test_resumeHost_success(self): | ||
3097 | 891 | # On a successful resume resume() fires the returned deferred | ||
3098 | 892 | # callback with 'None'. | ||
3099 | 893 | self.slave_helper.getServerSlave() | ||
3100 | 894 | slave = self.slave_helper.getClientSlave() | ||
3101 | 895 | |||
3102 | 896 | # The configuration testing command-line. | ||
3103 | 897 | self.assertEqual( | ||
3104 | 898 | 'echo %(vm_host)s', config.builddmaster.vm_resume_command) | ||
3105 | 899 | |||
3106 | 900 | # On success the response is None. | ||
3107 | 901 | def check_resume_success(response): | ||
3108 | 902 | out, err, code = response | ||
3109 | 903 | self.assertEqual(os.EX_OK, code) | ||
3110 | 904 | # XXX: JonathanLange 2010-09-23: We should instead pass the | ||
3111 | 905 | # expected vm_host into the client slave. Not doing this now, | ||
3112 | 906 | # since the SlaveHelper is being moved around. | ||
3113 | 907 | self.assertEqual("%s\n" % slave._vm_host, out) | ||
3114 | 908 | d = slave.resume() | ||
3115 | 909 | d.addBoth(check_resume_success) | ||
3116 | 910 | return d | ||
3117 | 911 | |||
3118 | 912 | def test_resumeHost_failure(self): | ||
3119 | 913 | # On a failed resume, 'resumeHost' fires the returned deferred | ||
3120 | 914 | # errorback with the `ProcessTerminated` failure. | ||
3121 | 915 | self.slave_helper.getServerSlave() | ||
3122 | 916 | slave = self.slave_helper.getClientSlave() | ||
3123 | 917 | |||
3124 | 918 | # Override the configuration command-line with one that will fail. | ||
3125 | 919 | failed_config = """ | ||
3126 | 920 | [builddmaster] | ||
3127 | 921 | vm_resume_command: test "%(vm_host)s = 'no-sir'" | ||
3128 | 922 | """ | ||
3129 | 923 | config.push('failed_resume_command', failed_config) | ||
3130 | 924 | self.addCleanup(config.pop, 'failed_resume_command') | ||
3131 | 925 | |||
3132 | 926 | # On failures, the response is a twisted `Failure` object containing | ||
3133 | 927 | # a tuple. | ||
3134 | 928 | def check_resume_failure(failure): | ||
3135 | 929 | out, err, code = failure.value | ||
3136 | 930 | # The process will exit with a return code of "1". | ||
3137 | 931 | self.assertEqual(code, 1) | ||
3138 | 932 | d = slave.resume() | ||
3139 | 933 | d.addBoth(check_resume_failure) | ||
3140 | 934 | return d | ||
3141 | 935 | |||
3142 | 936 | def test_resumeHost_timeout(self): | ||
3143 | 937 | # On a resume timeouts, 'resumeHost' fires the returned deferred | ||
3144 | 938 | # errorback with the `TimeoutError` failure. | ||
3145 | 939 | self.slave_helper.getServerSlave() | ||
3146 | 940 | slave = self.slave_helper.getClientSlave() | ||
3147 | 941 | |||
3148 | 942 | # Override the configuration command-line with one that will timeout. | ||
3149 | 943 | timeout_config = """ | ||
3150 | 944 | [builddmaster] | ||
3151 | 945 | vm_resume_command: sleep 5 | ||
3152 | 946 | socket_timeout: 1 | ||
3153 | 947 | """ | ||
3154 | 948 | config.push('timeout_resume_command', timeout_config) | ||
3155 | 949 | self.addCleanup(config.pop, 'timeout_resume_command') | ||
3156 | 950 | |||
3157 | 951 | # On timeouts, the response is a twisted `Failure` object containing | ||
3158 | 952 | # a `TimeoutError` error. | ||
3159 | 953 | def check_resume_timeout(failure): | ||
3160 | 954 | self.assertIsInstance(failure, Failure) | ||
3161 | 955 | out, err, code = failure.value | ||
3162 | 956 | self.assertEqual(code, signal.SIGKILL) | ||
3163 | 957 | clock = Clock() | ||
3164 | 958 | d = slave.resume(clock=clock) | ||
3165 | 959 | # Move the clock beyond the socket_timeout but earlier than the | ||
3166 | 960 | # sleep 5. This stops the test having to wait for the timeout. | ||
3167 | 961 | # Fast tests FTW! | ||
3168 | 962 | clock.advance(2) | ||
3169 | 963 | d.addBoth(check_resume_timeout) | ||
3170 | 964 | return d | ||
3171 | 965 | |||
3172 | 966 | |||
3173 | 967 | class TestSlaveTimeouts(TrialTestCase): | ||
3174 | 968 | # Testing that the methods that call callRemote() all time out | ||
3175 | 969 | # as required. | ||
3176 | 970 | |||
3177 | 971 | layer = TwistedLayer | ||
3178 | 972 | |||
3179 | 973 | def setUp(self): | ||
3180 | 974 | super(TestSlaveTimeouts, self).setUp() | ||
3181 | 975 | self.slave_helper = SlaveTestHelpers() | ||
3182 | 976 | self.slave_helper.setUp() | ||
3183 | 977 | self.addCleanup(self.slave_helper.cleanUp) | ||
3184 | 978 | self.clock = Clock() | ||
3185 | 979 | self.proxy = DeadProxy("url") | ||
3186 | 980 | self.slave = self.slave_helper.getClientSlave( | ||
3187 | 981 | reactor=self.clock, proxy=self.proxy) | ||
3188 | 982 | |||
3189 | 983 | def assertCancelled(self, d): | ||
3190 | 984 | self.clock.advance(config.builddmaster.socket_timeout + 1) | ||
3191 | 985 | return self.assertFailure(d, CancelledError) | ||
3192 | 986 | |||
3193 | 987 | def test_timeout_abort(self): | ||
3194 | 988 | return self.assertCancelled(self.slave.abort()) | ||
3195 | 989 | |||
3196 | 990 | def test_timeout_clean(self): | ||
3197 | 991 | return self.assertCancelled(self.slave.clean()) | ||
3198 | 992 | |||
3199 | 993 | def test_timeout_echo(self): | ||
3200 | 994 | return self.assertCancelled(self.slave.echo()) | ||
3201 | 995 | |||
3202 | 996 | def test_timeout_info(self): | ||
3203 | 997 | return self.assertCancelled(self.slave.info()) | ||
3204 | 998 | |||
3205 | 999 | def test_timeout_status(self): | ||
3206 | 1000 | return self.assertCancelled(self.slave.status()) | ||
3207 | 1001 | |||
3208 | 1002 | def test_timeout_ensurepresent(self): | ||
3209 | 1003 | return self.assertCancelled( | ||
3210 | 1004 | self.slave.ensurepresent(None, None, None, None)) | ||
3211 | 1005 | |||
3212 | 1006 | def test_timeout_build(self): | ||
3213 | 1007 | return self.assertCancelled( | ||
3214 | 1008 | self.slave.build(None, None, None, None, None)) | ||
3215 | 1009 | |||
3216 | 1010 | |||
3217 | 1011 | class TestSlaveWithLibrarian(TrialTestCase): | ||
3218 | 1012 | """Tests that need more of Launchpad to run.""" | ||
3219 | 1013 | |||
3220 | 1014 | layer = TwistedLaunchpadZopelessLayer | ||
3221 | 1015 | |||
3222 | 1016 | def setUp(self): | ||
3223 | 1017 | super(TestSlaveWithLibrarian, self) | ||
3224 | 1018 | self.slave_helper = SlaveTestHelpers() | ||
3225 | 1019 | self.slave_helper.setUp() | ||
3226 | 1020 | self.addCleanup(self.slave_helper.cleanUp) | ||
3227 | 1021 | self.factory = LaunchpadObjectFactory() | ||
3228 | 1022 | login_as(ANONYMOUS) | ||
3229 | 1023 | self.addCleanup(logout) | ||
3230 | 1024 | |||
3231 | 1025 | def test_ensurepresent_librarian(self): | ||
3232 | 1026 | # ensurepresent, when given an http URL for a file will download the | ||
3233 | 1027 | # file from that URL and report that the file is present, and it was | ||
3234 | 1028 | # downloaded. | ||
3235 | 1029 | |||
3236 | 1030 | # Use the Librarian because it's a "convenient" web server. | ||
3237 | 1031 | lf = self.factory.makeLibraryFileAlias( | ||
3238 | 1032 | 'HelloWorld.txt', content="Hello World") | ||
3239 | 1033 | self.layer.txn.commit() | ||
3240 | 1034 | self.slave_helper.getServerSlave() | ||
3241 | 1035 | slave = self.slave_helper.getClientSlave() | ||
3242 | 1036 | d = slave.ensurepresent( | ||
3243 | 1037 | lf.content.sha1, lf.http_url, "", "") | ||
3244 | 1038 | d.addCallback(self.assertEqual, [True, 'Download']) | ||
3245 | 1039 | return d | ||
3246 | 1040 | |||
3247 | 1041 | def test_retrieve_files_from_filecache(self): | ||
3248 | 1042 | # Files that are present on the slave can be downloaded with a | ||
3249 | 1043 | # filename made from the sha1 of the content underneath the | ||
3250 | 1044 | # 'filecache' directory. | ||
3251 | 1045 | content = "Hello World" | ||
3252 | 1046 | lf = self.factory.makeLibraryFileAlias( | ||
3253 | 1047 | 'HelloWorld.txt', content=content) | ||
3254 | 1048 | self.layer.txn.commit() | ||
3255 | 1049 | expected_url = '%s/filecache/%s' % ( | ||
3256 | 1050 | self.slave_helper.BASE_URL, lf.content.sha1) | ||
3257 | 1051 | self.slave_helper.getServerSlave() | ||
3258 | 1052 | slave = self.slave_helper.getClientSlave() | ||
3259 | 1053 | d = slave.ensurepresent( | ||
3260 | 1054 | lf.content.sha1, lf.http_url, "", "") | ||
3261 | 1055 | def check_file(ignored): | ||
3262 | 1056 | d = getPage(expected_url.encode('utf8')) | ||
3263 | 1057 | return d.addCallback(self.assertEqual, content) | ||
3264 | 1058 | return d.addCallback(check_file) | ||
3270 | 1059 | 631 | ||
3271 | === modified file 'lib/lp/buildmaster/tests/test_manager.py' | |||
3272 | --- lib/lp/buildmaster/tests/test_manager.py 2010-10-19 13:58:21 +0000 | |||
3273 | +++ lib/lp/buildmaster/tests/test_manager.py 2010-12-07 16:24:04 +0000 | |||
3274 | @@ -6,7 +6,6 @@ | |||
3275 | 6 | import os | 6 | import os |
3276 | 7 | import signal | 7 | import signal |
3277 | 8 | import time | 8 | import time |
3278 | 9 | import xmlrpclib | ||
3279 | 10 | 9 | ||
3280 | 11 | import transaction | 10 | import transaction |
3281 | 12 | 11 | ||
3282 | @@ -15,7 +14,9 @@ | |||
3283 | 15 | reactor, | 14 | reactor, |
3284 | 16 | task, | 15 | task, |
3285 | 17 | ) | 16 | ) |
3286 | 17 | from twisted.internet.error import ConnectionClosed | ||
3287 | 18 | from twisted.internet.task import ( | 18 | from twisted.internet.task import ( |
3288 | 19 | Clock, | ||
3289 | 19 | deferLater, | 20 | deferLater, |
3290 | 20 | ) | 21 | ) |
3291 | 21 | from twisted.python.failure import Failure | 22 | from twisted.python.failure import Failure |
3292 | @@ -29,45 +30,577 @@ | |||
3293 | 29 | ANONYMOUS, | 30 | ANONYMOUS, |
3294 | 30 | login, | 31 | login, |
3295 | 31 | ) | 32 | ) |
3299 | 32 | from canonical.launchpad.scripts.logger import ( | 33 | from canonical.launchpad.scripts.logger import BufferLogger |
3297 | 33 | QuietFakeLogger, | ||
3298 | 34 | ) | ||
3300 | 35 | from canonical.testing.layers import ( | 34 | from canonical.testing.layers import ( |
3301 | 36 | LaunchpadScriptLayer, | 35 | LaunchpadScriptLayer, |
3303 | 37 | TwistedLaunchpadZopelessLayer, | 36 | LaunchpadZopelessLayer, |
3304 | 38 | TwistedLayer, | 37 | TwistedLayer, |
3305 | 39 | ZopelessDatabaseLayer, | ||
3306 | 40 | ) | 38 | ) |
3307 | 41 | from lp.buildmaster.enums import BuildStatus | 39 | from lp.buildmaster.enums import BuildStatus |
3308 | 42 | from lp.buildmaster.interfaces.builder import IBuilderSet | 40 | from lp.buildmaster.interfaces.builder import IBuilderSet |
3309 | 43 | from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet | 41 | from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet |
3310 | 44 | from lp.buildmaster.manager import ( | 42 | from lp.buildmaster.manager import ( |
3312 | 45 | assessFailureCounts, | 43 | BaseDispatchResult, |
3313 | 44 | buildd_success_result_map, | ||
3314 | 46 | BuilddManager, | 45 | BuilddManager, |
3315 | 46 | FailDispatchResult, | ||
3316 | 47 | NewBuildersScanner, | 47 | NewBuildersScanner, |
3317 | 48 | RecordingSlave, | ||
3318 | 49 | ResetDispatchResult, | ||
3319 | 48 | SlaveScanner, | 50 | SlaveScanner, |
3320 | 49 | ) | 51 | ) |
3321 | 50 | from lp.buildmaster.model.builder import Builder | ||
3322 | 51 | from lp.buildmaster.tests.harness import BuilddManagerTestSetup | 52 | from lp.buildmaster.tests.harness import BuilddManagerTestSetup |
3328 | 52 | from lp.buildmaster.tests.mock_slaves import ( | 53 | from lp.buildmaster.tests.mock_slaves import BuildingSlave |
3324 | 53 | BrokenSlave, | ||
3325 | 54 | BuildingSlave, | ||
3326 | 55 | OkSlave, | ||
3327 | 56 | ) | ||
3329 | 57 | from lp.registry.interfaces.distribution import IDistributionSet | 54 | from lp.registry.interfaces.distribution import IDistributionSet |
3330 | 58 | from lp.soyuz.interfaces.binarypackagebuild import IBinaryPackageBuildSet | 55 | from lp.soyuz.interfaces.binarypackagebuild import IBinaryPackageBuildSet |
3332 | 59 | from lp.testing import TestCaseWithFactory | 56 | from lp.soyuz.tests.test_publishing import SoyuzTestPublisher |
3333 | 57 | from lp.testing import TestCase as LaunchpadTestCase | ||
3334 | 60 | from lp.testing.factory import LaunchpadObjectFactory | 58 | from lp.testing.factory import LaunchpadObjectFactory |
3335 | 61 | from lp.testing.fakemethod import FakeMethod | 59 | from lp.testing.fakemethod import FakeMethod |
3336 | 62 | from lp.testing.sampledata import BOB_THE_BUILDER_NAME | 60 | from lp.testing.sampledata import BOB_THE_BUILDER_NAME |
3337 | 63 | 61 | ||
3338 | 64 | 62 | ||
3339 | 63 | class TestRecordingSlaves(TrialTestCase): | ||
3340 | 64 | """Tests for the recording slave class.""" | ||
3341 | 65 | layer = TwistedLayer | ||
3342 | 66 | |||
3343 | 67 | def setUp(self): | ||
3344 | 68 | """Setup a fresh `RecordingSlave` for tests.""" | ||
3345 | 69 | TrialTestCase.setUp(self) | ||
3346 | 70 | self.slave = RecordingSlave( | ||
3347 | 71 | 'foo', 'http://foo:8221/rpc', 'foo.host') | ||
3348 | 72 | |||
3349 | 73 | def test_representation(self): | ||
3350 | 74 | """`RecordingSlave` has a custom representation. | ||
3351 | 75 | |||
3352 | 76 | It encloses builder name and xmlrpc url for debug purposes. | ||
3353 | 77 | """ | ||
3354 | 78 | self.assertEqual('<foo:http://foo:8221/rpc>', repr(self.slave)) | ||
3355 | 79 | |||
3356 | 80 | def assert_ensurepresent(self, func): | ||
3357 | 81 | """Helper function to test results from calling ensurepresent.""" | ||
3358 | 82 | self.assertEqual( | ||
3359 | 83 | [True, 'Download'], | ||
3360 | 84 | func('boing', 'bar', 'baz')) | ||
3361 | 85 | self.assertEqual( | ||
3362 | 86 | [('ensurepresent', ('boing', 'bar', 'baz'))], | ||
3363 | 87 | self.slave.calls) | ||
3364 | 88 | |||
3365 | 89 | def test_ensurepresent(self): | ||
3366 | 90 | """`RecordingSlave.ensurepresent` always succeeds. | ||
3367 | 91 | |||
3368 | 92 | It returns the expected succeed code and records the interaction | ||
3369 | 93 | information for later use. | ||
3370 | 94 | """ | ||
3371 | 95 | self.assert_ensurepresent(self.slave.ensurepresent) | ||
3372 | 96 | |||
3373 | 97 | def test_sendFileToSlave(self): | ||
3374 | 98 | """RecordingSlave.sendFileToSlave always succeeeds. | ||
3375 | 99 | |||
3376 | 100 | It calls ensurepresent() and hence returns the same results. | ||
3377 | 101 | """ | ||
3378 | 102 | self.assert_ensurepresent(self.slave.sendFileToSlave) | ||
3379 | 103 | |||
3380 | 104 | def test_build(self): | ||
3381 | 105 | """`RecordingSlave.build` always succeeds. | ||
3382 | 106 | |||
3383 | 107 | It returns the expected succeed code and records the interaction | ||
3384 | 108 | information for later use. | ||
3385 | 109 | """ | ||
3386 | 110 | self.assertEqual( | ||
3387 | 111 | ['BuilderStatus.BUILDING', 'boing'], | ||
3388 | 112 | self.slave.build('boing', 'bar', 'baz')) | ||
3389 | 113 | self.assertEqual( | ||
3390 | 114 | [('build', ('boing', 'bar', 'baz'))], | ||
3391 | 115 | self.slave.calls) | ||
3392 | 116 | |||
3393 | 117 | def test_resume(self): | ||
3394 | 118 | """`RecordingSlave.resume` always returns successs.""" | ||
3395 | 119 | # Resume isn't requested in a just-instantiated RecordingSlave. | ||
3396 | 120 | self.assertFalse(self.slave.resume_requested) | ||
3397 | 121 | |||
3398 | 122 | # When resume is called, it returns the success list and mark | ||
3399 | 123 | # the slave for resuming. | ||
3400 | 124 | self.assertEqual(['', '', os.EX_OK], self.slave.resume()) | ||
3401 | 125 | self.assertTrue(self.slave.resume_requested) | ||
3402 | 126 | |||
3403 | 127 | def test_resumeHost_success(self): | ||
3404 | 128 | # On a successful resume resumeHost() fires the returned deferred | ||
3405 | 129 | # callback with 'None'. | ||
3406 | 130 | |||
3407 | 131 | # The configuration testing command-line. | ||
3408 | 132 | self.assertEqual( | ||
3409 | 133 | 'echo %(vm_host)s', config.builddmaster.vm_resume_command) | ||
3410 | 134 | |||
3411 | 135 | # On success the response is None. | ||
3412 | 136 | def check_resume_success(response): | ||
3413 | 137 | out, err, code = response | ||
3414 | 138 | self.assertEqual(os.EX_OK, code) | ||
3415 | 139 | self.assertEqual("%s\n" % self.slave.vm_host, out) | ||
3416 | 140 | d = self.slave.resumeSlave() | ||
3417 | 141 | d.addBoth(check_resume_success) | ||
3418 | 142 | return d | ||
3419 | 143 | |||
3420 | 144 | def test_resumeHost_failure(self): | ||
3421 | 145 | # On a failed resume, 'resumeHost' fires the returned deferred | ||
3422 | 146 | # errorback with the `ProcessTerminated` failure. | ||
3423 | 147 | |||
3424 | 148 | # Override the configuration command-line with one that will fail. | ||
3425 | 149 | failed_config = """ | ||
3426 | 150 | [builddmaster] | ||
3427 | 151 | vm_resume_command: test "%(vm_host)s = 'no-sir'" | ||
3428 | 152 | """ | ||
3429 | 153 | config.push('failed_resume_command', failed_config) | ||
3430 | 154 | self.addCleanup(config.pop, 'failed_resume_command') | ||
3431 | 155 | |||
3432 | 156 | # On failures, the response is a twisted `Failure` object containing | ||
3433 | 157 | # a tuple. | ||
3434 | 158 | def check_resume_failure(failure): | ||
3435 | 159 | out, err, code = failure.value | ||
3436 | 160 | # The process will exit with a return code of "1". | ||
3437 | 161 | self.assertEqual(code, 1) | ||
3438 | 162 | d = self.slave.resumeSlave() | ||
3439 | 163 | d.addBoth(check_resume_failure) | ||
3440 | 164 | return d | ||
3441 | 165 | |||
3442 | 166 | def test_resumeHost_timeout(self): | ||
3443 | 167 | # On a resume timeouts, 'resumeHost' fires the returned deferred | ||
3444 | 168 | # errorback with the `TimeoutError` failure. | ||
3445 | 169 | |||
3446 | 170 | # Override the configuration command-line with one that will timeout. | ||
3447 | 171 | timeout_config = """ | ||
3448 | 172 | [builddmaster] | ||
3449 | 173 | vm_resume_command: sleep 5 | ||
3450 | 174 | socket_timeout: 1 | ||
3451 | 175 | """ | ||
3452 | 176 | config.push('timeout_resume_command', timeout_config) | ||
3453 | 177 | self.addCleanup(config.pop, 'timeout_resume_command') | ||
3454 | 178 | |||
3455 | 179 | # On timeouts, the response is a twisted `Failure` object containing | ||
3456 | 180 | # a `TimeoutError` error. | ||
3457 | 181 | def check_resume_timeout(failure): | ||
3458 | 182 | self.assertIsInstance(failure, Failure) | ||
3459 | 183 | out, err, code = failure.value | ||
3460 | 184 | self.assertEqual(code, signal.SIGKILL) | ||
3461 | 185 | clock = Clock() | ||
3462 | 186 | d = self.slave.resumeSlave(clock=clock) | ||
3463 | 187 | # Move the clock beyond the socket_timeout but earlier than the | ||
3464 | 188 | # sleep 5. This stops the test having to wait for the timeout. | ||
3465 | 189 | # Fast tests FTW! | ||
3466 | 190 | clock.advance(2) | ||
3467 | 191 | d.addBoth(check_resume_timeout) | ||
3468 | 192 | return d | ||
3469 | 193 | |||
3470 | 194 | |||
3471 | 195 | class TestingXMLRPCProxy: | ||
3472 | 196 | """This class mimics a twisted XMLRPC Proxy class.""" | ||
3473 | 197 | |||
3474 | 198 | def __init__(self, failure_info=None): | ||
3475 | 199 | self.calls = [] | ||
3476 | 200 | self.failure_info = failure_info | ||
3477 | 201 | self.works = failure_info is None | ||
3478 | 202 | |||
3479 | 203 | def callRemote(self, *args): | ||
3480 | 204 | self.calls.append(args) | ||
3481 | 205 | if self.works: | ||
3482 | 206 | result = buildd_success_result_map.get(args[0]) | ||
3483 | 207 | else: | ||
3484 | 208 | result = 'boing' | ||
3485 | 209 | return defer.succeed([result, self.failure_info]) | ||
3486 | 210 | |||
3487 | 211 | |||
3488 | 212 | class TestingResetDispatchResult(ResetDispatchResult): | ||
3489 | 213 | """Override the evaluation method to simply annotate the call.""" | ||
3490 | 214 | |||
3491 | 215 | def __init__(self, slave, info=None): | ||
3492 | 216 | ResetDispatchResult.__init__(self, slave, info) | ||
3493 | 217 | self.processed = False | ||
3494 | 218 | |||
3495 | 219 | def __call__(self): | ||
3496 | 220 | self.processed = True | ||
3497 | 221 | |||
3498 | 222 | |||
3499 | 223 | class TestingFailDispatchResult(FailDispatchResult): | ||
3500 | 224 | """Override the evaluation method to simply annotate the call.""" | ||
3501 | 225 | |||
3502 | 226 | def __init__(self, slave, info=None): | ||
3503 | 227 | FailDispatchResult.__init__(self, slave, info) | ||
3504 | 228 | self.processed = False | ||
3505 | 229 | |||
3506 | 230 | def __call__(self): | ||
3507 | 231 | self.processed = True | ||
3508 | 232 | |||
3509 | 233 | |||
3510 | 234 | class TestingSlaveScanner(SlaveScanner): | ||
3511 | 235 | """Override the dispatch result factories """ | ||
3512 | 236 | |||
3513 | 237 | reset_result = TestingResetDispatchResult | ||
3514 | 238 | fail_result = TestingFailDispatchResult | ||
3515 | 239 | |||
3516 | 240 | |||
3517 | 241 | class TestSlaveScanner(TrialTestCase): | ||
3518 | 242 | """Tests for the actual build slave manager.""" | ||
3519 | 243 | layer = LaunchpadZopelessLayer | ||
3520 | 244 | |||
3521 | 245 | def setUp(self): | ||
3522 | 246 | TrialTestCase.setUp(self) | ||
3523 | 247 | self.manager = TestingSlaveScanner( | ||
3524 | 248 | BOB_THE_BUILDER_NAME, BufferLogger()) | ||
3525 | 249 | |||
3526 | 250 | self.fake_builder_url = 'http://bob.buildd:8221/' | ||
3527 | 251 | self.fake_builder_host = 'bob.host' | ||
3528 | 252 | |||
3529 | 253 | # We will use an instrumented SlaveScanner instance for tests in | ||
3530 | 254 | # this context. | ||
3531 | 255 | |||
3532 | 256 | # Stop cyclic execution and record the end of the cycle. | ||
3533 | 257 | self.stopped = False | ||
3534 | 258 | |||
3535 | 259 | def testNextCycle(): | ||
3536 | 260 | self.stopped = True | ||
3537 | 261 | |||
3538 | 262 | self.manager.scheduleNextScanCycle = testNextCycle | ||
3539 | 263 | |||
3540 | 264 | # Return the testing Proxy version. | ||
3541 | 265 | self.test_proxy = TestingXMLRPCProxy() | ||
3542 | 266 | |||
3543 | 267 | def testGetProxyForSlave(slave): | ||
3544 | 268 | return self.test_proxy | ||
3545 | 269 | self.manager._getProxyForSlave = testGetProxyForSlave | ||
3546 | 270 | |||
3547 | 271 | # Deactivate the 'scan' method. | ||
3548 | 272 | def testScan(): | ||
3549 | 273 | pass | ||
3550 | 274 | self.manager.scan = testScan | ||
3551 | 275 | |||
3552 | 276 | # Stop automatic collection of dispatching results. | ||
3553 | 277 | def testslaveConversationEnded(): | ||
3554 | 278 | pass | ||
3555 | 279 | self._realslaveConversationEnded = self.manager.slaveConversationEnded | ||
3556 | 280 | self.manager.slaveConversationEnded = testslaveConversationEnded | ||
3557 | 281 | |||
3558 | 282 | def assertIsDispatchReset(self, result): | ||
3559 | 283 | self.assertTrue( | ||
3560 | 284 | isinstance(result, TestingResetDispatchResult), | ||
3561 | 285 | 'Dispatch failure did not result in a ResetBuildResult object') | ||
3562 | 286 | |||
3563 | 287 | def assertIsDispatchFail(self, result): | ||
3564 | 288 | self.assertTrue( | ||
3565 | 289 | isinstance(result, TestingFailDispatchResult), | ||
3566 | 290 | 'Dispatch failure did not result in a FailBuildResult object') | ||
3567 | 291 | |||
3568 | 292 | def test_checkResume(self): | ||
3569 | 293 | """`SlaveScanner.checkResume` is chained after resume requests. | ||
3570 | 294 | |||
3571 | 295 | If the resume request succeed it returns None, otherwise it returns | ||
3572 | 296 | a `ResetBuildResult` (the one in the test context) that will be | ||
3573 | 297 | collect and evaluated later. | ||
3574 | 298 | |||
3575 | 299 | See `RecordingSlave.resumeHost` for more information about the resume | ||
3576 | 300 | result contents. | ||
3577 | 301 | """ | ||
3578 | 302 | slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host') | ||
3579 | 303 | |||
3580 | 304 | successful_response = ['', '', os.EX_OK] | ||
3581 | 305 | result = self.manager.checkResume(successful_response, slave) | ||
3582 | 306 | self.assertEqual( | ||
3583 | 307 | None, result, 'Successful resume checks should return None') | ||
3584 | 308 | |||
3585 | 309 | failed_response = ['stdout', 'stderr', 1] | ||
3586 | 310 | result = self.manager.checkResume(failed_response, slave) | ||
3587 | 311 | self.assertIsDispatchReset(result) | ||
3588 | 312 | self.assertEqual( | ||
3589 | 313 | '<foo:http://foo.buildd:8221/> reset failure', repr(result)) | ||
3590 | 314 | self.assertEqual( | ||
3591 | 315 | result.info, "stdout\nstderr") | ||
3592 | 316 | |||
3593 | 317 | def test_fail_to_resume_slave_resets_slave(self): | ||
3594 | 318 | # If an attempt to resume and dispatch a slave fails, we reset the | ||
3595 | 319 | # slave by calling self.reset_result(slave)(). | ||
3596 | 320 | |||
3597 | 321 | reset_result_calls = [] | ||
3598 | 322 | |||
3599 | 323 | class LoggingResetResult(BaseDispatchResult): | ||
3600 | 324 | """A DispatchResult that logs calls to itself. | ||
3601 | 325 | |||
3602 | 326 | This *must* subclass BaseDispatchResult, otherwise finishCycle() | ||
3603 | 327 | won't treat it like a dispatch result. | ||
3604 | 328 | """ | ||
3605 | 329 | |||
3606 | 330 | def __init__(self, slave, info=None): | ||
3607 | 331 | self.slave = slave | ||
3608 | 332 | |||
3609 | 333 | def __call__(self): | ||
3610 | 334 | reset_result_calls.append(self.slave) | ||
3611 | 335 | |||
3612 | 336 | # Make a failing slave that is requesting a resume. | ||
3613 | 337 | slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host') | ||
3614 | 338 | slave.resume_requested = True | ||
3615 | 339 | slave.resumeSlave = lambda: deferLater( | ||
3616 | 340 | reactor, 0, defer.fail, Failure(('out', 'err', 1))) | ||
3617 | 341 | |||
3618 | 342 | # Make the manager log the reset result calls. | ||
3619 | 343 | self.manager.reset_result = LoggingResetResult | ||
3620 | 344 | |||
3621 | 345 | # We only care about this one slave. Reset the list of manager | ||
3622 | 346 | # deferreds in case setUp did something unexpected. | ||
3623 | 347 | self.manager._deferred_list = [] | ||
3624 | 348 | |||
3625 | 349 | # Here, we're patching the slaveConversationEnded method so we can | ||
3626 | 350 | # get an extra callback at the end of it, so we can | ||
3627 | 351 | # verify that the reset_result was really called. | ||
3628 | 352 | def _slaveConversationEnded(): | ||
3629 | 353 | d = self._realslaveConversationEnded() | ||
3630 | 354 | return d.addCallback( | ||
3631 | 355 | lambda ignored: self.assertEqual([slave], reset_result_calls)) | ||
3632 | 356 | self.manager.slaveConversationEnded = _slaveConversationEnded | ||
3633 | 357 | |||
3634 | 358 | self.manager.resumeAndDispatch(slave) | ||
3635 | 359 | |||
3636 | 360 | def test_failed_to_resume_slave_ready_for_reset(self): | ||
3637 | 361 | # When a slave fails to resume, the manager has a Deferred in its | ||
3638 | 362 | # Deferred list that is ready to fire with a ResetDispatchResult. | ||
3639 | 363 | |||
3640 | 364 | # Make a failing slave that is requesting a resume. | ||
3641 | 365 | slave = RecordingSlave('foo', 'http://foo.buildd:8221/', 'foo.host') | ||
3642 | 366 | slave.resume_requested = True | ||
3643 | 367 | slave.resumeSlave = lambda: defer.fail(Failure(('out', 'err', 1))) | ||
3644 | 368 | |||
3645 | 369 | # We only care about this one slave. Reset the list of manager | ||
3646 | 370 | # deferreds in case setUp did something unexpected. | ||
3647 | 371 | self.manager._deferred_list = [] | ||
3648 | 372 | # Restore the slaveConversationEnded method. It's very relevant to | ||
3649 | 373 | # this test. | ||
3650 | 374 | self.manager.slaveConversationEnded = self._realslaveConversationEnded | ||
3651 | 375 | self.manager.resumeAndDispatch(slave) | ||
3652 | 376 | [d] = self.manager._deferred_list | ||
3653 | 377 | |||
3654 | 378 | # The Deferred for our failing slave should be ready to fire | ||
3655 | 379 | # successfully with a ResetDispatchResult. | ||
3656 | 380 | def check_result(result): | ||
3657 | 381 | self.assertIsInstance(result, ResetDispatchResult) | ||
3658 | 382 | self.assertEqual(slave, result.slave) | ||
3659 | 383 | self.assertFalse(result.processed) | ||
3660 | 384 | return d.addCallback(check_result) | ||
3661 | 385 | |||
3662 | 386 | def _setUpSlaveAndBuilder(self, builder_failure_count=None, | ||
3663 | 387 | job_failure_count=None): | ||
3664 | 388 | # Helper function to set up a builder and its recording slave. | ||
3665 | 389 | if builder_failure_count is None: | ||
3666 | 390 | builder_failure_count = 0 | ||
3667 | 391 | if job_failure_count is None: | ||
3668 | 392 | job_failure_count = 0 | ||
3669 | 393 | slave = RecordingSlave( | ||
3670 | 394 | BOB_THE_BUILDER_NAME, self.fake_builder_url, | ||
3671 | 395 | self.fake_builder_host) | ||
3672 | 396 | bob_builder = getUtility(IBuilderSet)[slave.name] | ||
3673 | 397 | bob_builder.failure_count = builder_failure_count | ||
3674 | 398 | bob_builder.getCurrentBuildFarmJob().failure_count = job_failure_count | ||
3675 | 399 | return slave, bob_builder | ||
3676 | 400 | |||
3677 | 401 | def test_checkDispatch_success(self): | ||
3678 | 402 | # SlaveScanner.checkDispatch returns None for a successful | ||
3679 | 403 | # dispatch. | ||
3680 | 404 | |||
3681 | 405 | """ | ||
3682 | 406 | If the dispatch request fails or a unknown method is given, it | ||
3683 | 407 | returns a `FailDispatchResult` (in the test context) that will | ||
3684 | 408 | be evaluated later. | ||
3685 | 409 | |||
3686 | 410 | Builders will be marked as failed if the following responses | ||
3687 | 411 | categories are received. | ||
3688 | 412 | |||
3689 | 413 | * Legitimate slave failures: when the response is a list with 2 | ||
3690 | 414 | elements but the first element ('status') does not correspond to | ||
3691 | 415 | the expected 'success' result. See `buildd_success_result_map`. | ||
3692 | 416 | |||
3693 | 417 | * Unexpected (code) failures: when the given 'method' is unknown | ||
3694 | 418 | or the response isn't a 2-element list or Failure instance. | ||
3695 | 419 | |||
3696 | 420 | Communication failures (a twisted `Failure` instance) will simply | ||
3697 | 421 | cause the builder to be reset, a `ResetDispatchResult` object is | ||
3698 | 422 | returned. In other words, network failures are ignored in this | ||
3699 | 423 | stage, broken builders will be identified and marked as so | ||
3700 | 424 | during 'scan()' stage. | ||
3701 | 425 | |||
3702 | 426 | On success dispatching it returns None. | ||
3703 | 427 | """ | ||
3704 | 428 | slave, bob_builder = self._setUpSlaveAndBuilder( | ||
3705 | 429 | builder_failure_count=0, job_failure_count=0) | ||
3706 | 430 | |||
3707 | 431 | # Successful legitimate response, None is returned. | ||
3708 | 432 | successful_response = [ | ||
3709 | 433 | buildd_success_result_map.get('ensurepresent'), 'cool builder'] | ||
3710 | 434 | result = self.manager.checkDispatch( | ||
3711 | 435 | successful_response, 'ensurepresent', slave) | ||
3712 | 436 | self.assertEqual( | ||
3713 | 437 | None, result, 'Successful dispatch checks should return None') | ||
3714 | 438 | |||
3715 | 439 | def test_checkDispatch_first_fail(self): | ||
3716 | 440 | # Failed legitimate response, results in FailDispatchResult and | ||
3717 | 441 | # failure_count on the job and the builder are both incremented. | ||
3718 | 442 | slave, bob_builder = self._setUpSlaveAndBuilder( | ||
3719 | 443 | builder_failure_count=0, job_failure_count=0) | ||
3720 | 444 | |||
3721 | 445 | failed_response = [False, 'uncool builder'] | ||
3722 | 446 | result = self.manager.checkDispatch( | ||
3723 | 447 | failed_response, 'ensurepresent', slave) | ||
3724 | 448 | self.assertIsDispatchFail(result) | ||
3725 | 449 | self.assertEqual( | ||
3726 | 450 | repr(result), | ||
3727 | 451 | '<bob:%s> failure (uncool builder)' % self.fake_builder_url) | ||
3728 | 452 | self.assertEqual(1, bob_builder.failure_count) | ||
3729 | 453 | self.assertEqual( | ||
3730 | 454 | 1, bob_builder.getCurrentBuildFarmJob().failure_count) | ||
3731 | 455 | |||
3732 | 456 | def test_checkDispatch_second_reset_fail_by_builder(self): | ||
3733 | 457 | # Twisted Failure response, results in a `FailDispatchResult`. | ||
3734 | 458 | slave, bob_builder = self._setUpSlaveAndBuilder( | ||
3735 | 459 | builder_failure_count=1, job_failure_count=0) | ||
3736 | 460 | |||
3737 | 461 | twisted_failure = Failure(ConnectionClosed('Boom!')) | ||
3738 | 462 | result = self.manager.checkDispatch( | ||
3739 | 463 | twisted_failure, 'ensurepresent', slave) | ||
3740 | 464 | self.assertIsDispatchFail(result) | ||
3741 | 465 | self.assertEqual( | ||
3742 | 466 | '<bob:%s> failure (None)' % self.fake_builder_url, repr(result)) | ||
3743 | 467 | self.assertEqual(2, bob_builder.failure_count) | ||
3744 | 468 | self.assertEqual( | ||
3745 | 469 | 1, bob_builder.getCurrentBuildFarmJob().failure_count) | ||
3746 | 470 | |||
3747 | 471 | def test_checkDispatch_second_comms_fail_by_builder(self): | ||
3748 | 472 | # Unexpected response, results in a `FailDispatchResult`. | ||
3749 | 473 | slave, bob_builder = self._setUpSlaveAndBuilder( | ||
3750 | 474 | builder_failure_count=1, job_failure_count=0) | ||
3751 | 475 | |||
3752 | 476 | unexpected_response = [1, 2, 3] | ||
3753 | 477 | result = self.manager.checkDispatch( | ||
3754 | 478 | unexpected_response, 'build', slave) | ||
3755 | 479 | self.assertIsDispatchFail(result) | ||
3756 | 480 | self.assertEqual( | ||
3757 | 481 | '<bob:%s> failure ' | ||
3758 | 482 | '(Unexpected response: [1, 2, 3])' % self.fake_builder_url, | ||
3759 | 483 | repr(result)) | ||
3760 | 484 | self.assertEqual(2, bob_builder.failure_count) | ||
3761 | 485 | self.assertEqual( | ||
3762 | 486 | 1, bob_builder.getCurrentBuildFarmJob().failure_count) | ||
3763 | 487 | |||
3764 | 488 | def test_checkDispatch_second_comms_fail_by_job(self): | ||
3765 | 489 | # Unknown method was given, results in a `FailDispatchResult`. | ||
3766 | 490 | # This could be caused by a faulty job which would fail the job. | ||
3767 | 491 | slave, bob_builder = self._setUpSlaveAndBuilder( | ||
3768 | 492 | builder_failure_count=0, job_failure_count=1) | ||
3769 | 493 | |||
3770 | 494 | successful_response = [ | ||
3771 | 495 | buildd_success_result_map.get('ensurepresent'), 'cool builder'] | ||
3772 | 496 | result = self.manager.checkDispatch( | ||
3773 | 497 | successful_response, 'unknown-method', slave) | ||
3774 | 498 | self.assertIsDispatchFail(result) | ||
3775 | 499 | self.assertEqual( | ||
3776 | 500 | '<bob:%s> failure ' | ||
3777 | 501 | '(Unknown slave method: unknown-method)' % self.fake_builder_url, | ||
3778 | 502 | repr(result)) | ||
3779 | 503 | self.assertEqual(1, bob_builder.failure_count) | ||
3780 | 504 | self.assertEqual( | ||
3781 | 505 | 2, bob_builder.getCurrentBuildFarmJob().failure_count) | ||
3782 | 506 | |||
3783 | 507 | def test_initiateDispatch(self): | ||
3784 | 508 | """Check `dispatchBuild` in various scenarios. | ||
3785 | 509 | |||
3786 | 510 | When there are no recording slaves (i.e. no build got dispatched | ||
3787 | 511 | in scan()) it simply finishes the cycle. | ||
3788 | 512 | |||
3789 | 513 | When there is a recording slave with pending slave calls, they are | ||
3790 | 514 | performed and if they all succeed the cycle is finished with no | ||
3791 | 515 | errors. | ||
3792 | 516 | |||
3793 | 517 | On slave call failure the chain is stopped immediately and an | ||
3794 | 518 | FailDispatchResult is collected while finishing the cycle. | ||
3795 | 519 | """ | ||
3796 | 520 | def check_no_events(results): | ||
3797 | 521 | errors = [ | ||
3798 | 522 | r for s, r in results if isinstance(r, BaseDispatchResult)] | ||
3799 | 523 | self.assertEqual(0, len(errors)) | ||
3800 | 524 | |||
3801 | 525 | def check_events(results): | ||
3802 | 526 | [error] = [r for s, r in results if r is not None] | ||
3803 | 527 | self.assertEqual( | ||
3804 | 528 | '<bob:%s> failure (very broken slave)' | ||
3805 | 529 | % self.fake_builder_url, | ||
3806 | 530 | repr(error)) | ||
3807 | 531 | self.assertTrue(error.processed) | ||
3808 | 532 | |||
3809 | 533 | def _wait_on_deferreds_then_check_no_events(): | ||
3810 | 534 | dl = self._realslaveConversationEnded() | ||
3811 | 535 | dl.addCallback(check_no_events) | ||
3812 | 536 | |||
3813 | 537 | def _wait_on_deferreds_then_check_events(): | ||
3814 | 538 | dl = self._realslaveConversationEnded() | ||
3815 | 539 | dl.addCallback(check_events) | ||
3816 | 540 | |||
3817 | 541 | # A functional slave charged with some interactions. | ||
3818 | 542 | slave = RecordingSlave( | ||
3819 | 543 | BOB_THE_BUILDER_NAME, self.fake_builder_url, | ||
3820 | 544 | self.fake_builder_host) | ||
3821 | 545 | slave.ensurepresent('arg1', 'arg2', 'arg3') | ||
3822 | 546 | slave.build('arg1', 'arg2', 'arg3') | ||
3823 | 547 | |||
3824 | 548 | # If the previous step (resuming) has failed nothing gets dispatched. | ||
3825 | 549 | reset_result = ResetDispatchResult(slave) | ||
3826 | 550 | result = self.manager.initiateDispatch(reset_result, slave) | ||
3827 | 551 | self.assertTrue(result is reset_result) | ||
3828 | 552 | self.assertFalse(slave.resume_requested) | ||
3829 | 553 | self.assertEqual(0, len(self.manager._deferred_list)) | ||
3830 | 554 | |||
3831 | 555 | # Operation with the default (funcional slave), no resets or | ||
3832 | 556 | # failures results are triggered. | ||
3833 | 557 | slave.resume() | ||
3834 | 558 | result = self.manager.initiateDispatch(None, slave) | ||
3835 | 559 | self.assertEqual(None, result) | ||
3836 | 560 | self.assertTrue(slave.resume_requested) | ||
3837 | 561 | self.assertEqual( | ||
3838 | 562 | [('ensurepresent', 'arg1', 'arg2', 'arg3'), | ||
3839 | 563 | ('build', 'arg1', 'arg2', 'arg3')], | ||
3840 | 564 | self.test_proxy.calls) | ||
3841 | 565 | self.assertEqual(2, len(self.manager._deferred_list)) | ||
3842 | 566 | |||
3843 | 567 | # Monkey patch the slaveConversationEnded method so we can chain a | ||
3844 | 568 | # callback to check the end of the result chain. | ||
3845 | 569 | self.manager.slaveConversationEnded = \ | ||
3846 | 570 | _wait_on_deferreds_then_check_no_events | ||
3847 | 571 | events = self.manager.slaveConversationEnded() | ||
3848 | 572 | |||
3849 | 573 | # Create a broken slave and insert interaction that will | ||
3850 | 574 | # cause the builder to be marked as fail. | ||
3851 | 575 | self.test_proxy = TestingXMLRPCProxy('very broken slave') | ||
3852 | 576 | slave = RecordingSlave( | ||
3853 | 577 | BOB_THE_BUILDER_NAME, self.fake_builder_url, | ||
3854 | 578 | self.fake_builder_host) | ||
3855 | 579 | slave.ensurepresent('arg1', 'arg2', 'arg3') | ||
3856 | 580 | slave.build('arg1', 'arg2', 'arg3') | ||
3857 | 581 | |||
3858 | 582 | result = self.manager.initiateDispatch(None, slave) | ||
3859 | 583 | self.assertEqual(None, result) | ||
3860 | 584 | self.assertEqual(3, len(self.manager._deferred_list)) | ||
3861 | 585 | self.assertEqual( | ||
3862 | 586 | [('ensurepresent', 'arg1', 'arg2', 'arg3')], | ||
3863 | 587 | self.test_proxy.calls) | ||
3864 | 588 | |||
3865 | 589 | # Monkey patch the slaveConversationEnded method so we can chain a | ||
3866 | 590 | # callback to check the end of the result chain. | ||
3867 | 591 | self.manager.slaveConversationEnded = \ | ||
3868 | 592 | _wait_on_deferreds_then_check_events | ||
3869 | 593 | events = self.manager.slaveConversationEnded() | ||
3870 | 594 | |||
3871 | 595 | return events | ||
3872 | 596 | |||
3873 | 597 | |||
3874 | 65 | class TestSlaveScannerScan(TrialTestCase): | 598 | class TestSlaveScannerScan(TrialTestCase): |
3875 | 66 | """Tests `SlaveScanner.scan` method. | 599 | """Tests `SlaveScanner.scan` method. |
3876 | 67 | 600 | ||
3877 | 68 | This method uses the old framework for scanning and dispatching builds. | 601 | This method uses the old framework for scanning and dispatching builds. |
3878 | 69 | """ | 602 | """ |
3880 | 70 | layer = TwistedLaunchpadZopelessLayer | 603 | layer = LaunchpadZopelessLayer |
3881 | 71 | 604 | ||
3882 | 72 | def setUp(self): | 605 | def setUp(self): |
3883 | 73 | """Setup TwistedLayer, TrialTestCase and BuilddSlaveTest. | 606 | """Setup TwistedLayer, TrialTestCase and BuilddSlaveTest. |
3884 | @@ -75,18 +608,19 @@ | |||
3885 | 75 | Also adjust the sampledata in a way a build can be dispatched to | 608 | Also adjust the sampledata in a way a build can be dispatched to |
3886 | 76 | 'bob' builder. | 609 | 'bob' builder. |
3887 | 77 | """ | 610 | """ |
3888 | 78 | from lp.soyuz.tests.test_publishing import SoyuzTestPublisher | ||
3889 | 79 | TwistedLayer.testSetUp() | 611 | TwistedLayer.testSetUp() |
3890 | 80 | TrialTestCase.setUp(self) | 612 | TrialTestCase.setUp(self) |
3891 | 81 | self.slave = BuilddSlaveTestSetup() | 613 | self.slave = BuilddSlaveTestSetup() |
3892 | 82 | self.slave.setUp() | 614 | self.slave.setUp() |
3893 | 83 | 615 | ||
3894 | 84 | # Creating the required chroots needed for dispatching. | 616 | # Creating the required chroots needed for dispatching. |
3895 | 617 | login('foo.bar@canonical.com') | ||
3896 | 85 | test_publisher = SoyuzTestPublisher() | 618 | test_publisher = SoyuzTestPublisher() |
3897 | 86 | ubuntu = getUtility(IDistributionSet).getByName('ubuntu') | 619 | ubuntu = getUtility(IDistributionSet).getByName('ubuntu') |
3898 | 87 | hoary = ubuntu.getSeries('hoary') | 620 | hoary = ubuntu.getSeries('hoary') |
3899 | 88 | test_publisher.setUpDefaultDistroSeries(hoary) | 621 | test_publisher.setUpDefaultDistroSeries(hoary) |
3900 | 89 | test_publisher.addFakeChroots() | 622 | test_publisher.addFakeChroots() |
3901 | 623 | login(ANONYMOUS) | ||
3902 | 90 | 624 | ||
3903 | 91 | def tearDown(self): | 625 | def tearDown(self): |
3904 | 92 | self.slave.tearDown() | 626 | self.slave.tearDown() |
3905 | @@ -94,7 +628,8 @@ | |||
3906 | 94 | TwistedLayer.testTearDown() | 628 | TwistedLayer.testTearDown() |
3907 | 95 | 629 | ||
3908 | 96 | def _resetBuilder(self, builder): | 630 | def _resetBuilder(self, builder): |
3910 | 97 | """Reset the given builder and its job.""" | 631 | """Reset the given builder and it's job.""" |
3911 | 632 | login('foo.bar@canonical.com') | ||
3912 | 98 | 633 | ||
3913 | 99 | builder.builderok = True | 634 | builder.builderok = True |
3914 | 100 | job = builder.currentjob | 635 | job = builder.currentjob |
3915 | @@ -102,6 +637,7 @@ | |||
3916 | 102 | job.reset() | 637 | job.reset() |
3917 | 103 | 638 | ||
3918 | 104 | transaction.commit() | 639 | transaction.commit() |
3919 | 640 | login(ANONYMOUS) | ||
3920 | 105 | 641 | ||
3921 | 106 | def assertBuildingJob(self, job, builder, logtail=None): | 642 | def assertBuildingJob(self, job, builder, logtail=None): |
3922 | 107 | """Assert the given job is building on the given builder.""" | 643 | """Assert the given job is building on the given builder.""" |
3923 | @@ -117,25 +653,55 @@ | |||
3924 | 117 | self.assertEqual(build.status, BuildStatus.BUILDING) | 653 | self.assertEqual(build.status, BuildStatus.BUILDING) |
3925 | 118 | self.assertEqual(job.logtail, logtail) | 654 | self.assertEqual(job.logtail, logtail) |
3926 | 119 | 655 | ||
3928 | 120 | def _getScanner(self, builder_name=None): | 656 | def _getManager(self): |
3929 | 121 | """Instantiate a SlaveScanner object. | 657 | """Instantiate a SlaveScanner object. |
3930 | 122 | 658 | ||
3931 | 123 | Replace its default logging handler by a testing version. | 659 | Replace its default logging handler by a testing version. |
3932 | 124 | """ | 660 | """ |
3937 | 125 | if builder_name is None: | 661 | manager = SlaveScanner(BOB_THE_BUILDER_NAME, BufferLogger()) |
3938 | 126 | builder_name = BOB_THE_BUILDER_NAME | 662 | manager.logger.name = 'slave-scanner' |
3935 | 127 | scanner = SlaveScanner(builder_name, QuietFakeLogger()) | ||
3936 | 128 | scanner.logger.name = 'slave-scanner' | ||
3939 | 129 | 663 | ||
3941 | 130 | return scanner | 664 | return manager |
3942 | 131 | 665 | ||
3943 | 132 | def _checkDispatch(self, slave, builder): | 666 | def _checkDispatch(self, slave, builder): |
3948 | 133 | # SlaveScanner.scan returns a slave when a dispatch was | 667 | """`SlaveScanner.scan` returns a `RecordingSlave`. |
3949 | 134 | # successful. We also check that the builder has a job on it. | 668 | |
3950 | 135 | 669 | The single slave returned should match the given builder and | |
3951 | 136 | self.assertTrue(slave is not None, "Expected a slave.") | 670 | contain interactions that should be performed asynchronously for |
3952 | 671 | properly dispatching the sampledata job. | ||
3953 | 672 | """ | ||
3954 | 673 | self.assertFalse( | ||
3955 | 674 | slave is None, "Unexpected recording_slaves.") | ||
3956 | 675 | |||
3957 | 676 | self.assertEqual(slave.name, builder.name) | ||
3958 | 677 | self.assertEqual(slave.url, builder.url) | ||
3959 | 678 | self.assertEqual(slave.vm_host, builder.vm_host) | ||
3960 | 137 | self.assertEqual(0, builder.failure_count) | 679 | self.assertEqual(0, builder.failure_count) |
3962 | 138 | self.assertTrue(builder.currentjob is not None) | 680 | |
3963 | 681 | self.assertEqual( | ||
3964 | 682 | [('ensurepresent', | ||
3965 | 683 | ('0feca720e2c29dafb2c900713ba560e03b758711', | ||
3966 | 684 | 'http://localhost:58000/93/fake_chroot.tar.gz', | ||
3967 | 685 | '', '')), | ||
3968 | 686 | ('ensurepresent', | ||
3969 | 687 | ('4e3961baf4f56fdbc95d0dd47f3c5bc275da8a33', | ||
3970 | 688 | 'http://localhost:58000/43/alsa-utils_1.0.9a-4ubuntu1.dsc', | ||
3971 | 689 | '', '')), | ||
3972 | 690 | ('build', | ||
3973 | 691 | ('6358a89e2215e19b02bf91e2e4d009640fae5cf8', | ||
3974 | 692 | 'binarypackage', '0feca720e2c29dafb2c900713ba560e03b758711', | ||
3975 | 693 | {'alsa-utils_1.0.9a-4ubuntu1.dsc': | ||
3976 | 694 | '4e3961baf4f56fdbc95d0dd47f3c5bc275da8a33'}, | ||
3977 | 695 | {'arch_indep': True, | ||
3978 | 696 | 'arch_tag': 'i386', | ||
3979 | 697 | 'archive_private': False, | ||
3980 | 698 | 'archive_purpose': 'PRIMARY', | ||
3981 | 699 | 'archives': | ||
3982 | 700 | ['deb http://ftpmaster.internal/ubuntu hoary main'], | ||
3983 | 701 | 'build_debug_symbols': False, | ||
3984 | 702 | 'ogrecomponent': 'main', | ||
3985 | 703 | 'suite': u'hoary'}))], | ||
3986 | 704 | slave.calls, "Job was not properly dispatched.") | ||
3987 | 139 | 705 | ||
3988 | 140 | def testScanDispatchForResetBuilder(self): | 706 | def testScanDispatchForResetBuilder(self): |
3989 | 141 | # A job gets dispatched to the sampledata builder after it's reset. | 707 | # A job gets dispatched to the sampledata builder after it's reset. |
3990 | @@ -143,27 +709,26 @@ | |||
3991 | 143 | # Reset sampledata builder. | 709 | # Reset sampledata builder. |
3992 | 144 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] | 710 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] |
3993 | 145 | self._resetBuilder(builder) | 711 | self._resetBuilder(builder) |
3994 | 146 | builder.setSlaveForTesting(OkSlave()) | ||
3995 | 147 | # Set this to 1 here so that _checkDispatch can make sure it's | 712 | # Set this to 1 here so that _checkDispatch can make sure it's |
3996 | 148 | # reset to 0 after a successful dispatch. | 713 | # reset to 0 after a successful dispatch. |
3997 | 149 | builder.failure_count = 1 | 714 | builder.failure_count = 1 |
3998 | 150 | 715 | ||
3999 | 151 | # Run 'scan' and check its result. | 716 | # Run 'scan' and check its result. |
4004 | 152 | self.layer.txn.commit() | 717 | LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser) |
4005 | 153 | self.layer.switchDbUser(config.builddmaster.dbuser) | 718 | manager = self._getManager() |
4006 | 154 | scanner = self._getScanner() | 719 | d = defer.maybeDeferred(manager.scan) |
4003 | 155 | d = defer.maybeDeferred(scanner.scan) | ||
4007 | 156 | d.addCallback(self._checkDispatch, builder) | 720 | d.addCallback(self._checkDispatch, builder) |
4008 | 157 | return d | 721 | return d |
4009 | 158 | 722 | ||
4011 | 159 | def _checkNoDispatch(self, slave, builder): | 723 | def _checkNoDispatch(self, recording_slave, builder): |
4012 | 160 | """Assert that no dispatch has occurred. | 724 | """Assert that no dispatch has occurred. |
4013 | 161 | 725 | ||
4015 | 162 | 'slave' is None, so no interations would be passed | 726 | 'recording_slave' is None, so no interations would be passed |
4016 | 163 | to the asynchonous dispatcher and the builder remained active | 727 | to the asynchonous dispatcher and the builder remained active |
4017 | 164 | and IDLE. | 728 | and IDLE. |
4018 | 165 | """ | 729 | """ |
4020 | 166 | self.assertTrue(slave is None, "Unexpected slave.") | 730 | self.assertTrue( |
4021 | 731 | recording_slave is None, "Unexpected recording_slave.") | ||
4022 | 167 | 732 | ||
4023 | 168 | builder = getUtility(IBuilderSet).get(builder.id) | 733 | builder = getUtility(IBuilderSet).get(builder.id) |
4024 | 169 | self.assertTrue(builder.builderok) | 734 | self.assertTrue(builder.builderok) |
4025 | @@ -188,9 +753,9 @@ | |||
4026 | 188 | login(ANONYMOUS) | 753 | login(ANONYMOUS) |
4027 | 189 | 754 | ||
4028 | 190 | # Run 'scan' and check its result. | 755 | # Run 'scan' and check its result. |
4032 | 191 | self.layer.switchDbUser(config.builddmaster.dbuser) | 756 | LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser) |
4033 | 192 | scanner = self._getScanner() | 757 | manager = self._getManager() |
4034 | 193 | d = defer.maybeDeferred(scanner.singleCycle) | 758 | d = defer.maybeDeferred(manager.scan) |
4035 | 194 | d.addCallback(self._checkNoDispatch, builder) | 759 | d.addCallback(self._checkNoDispatch, builder) |
4036 | 195 | return d | 760 | return d |
4037 | 196 | 761 | ||
4038 | @@ -228,9 +793,9 @@ | |||
4039 | 228 | login(ANONYMOUS) | 793 | login(ANONYMOUS) |
4040 | 229 | 794 | ||
4041 | 230 | # Run 'scan' and check its result. | 795 | # Run 'scan' and check its result. |
4045 | 231 | self.layer.switchDbUser(config.builddmaster.dbuser) | 796 | LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser) |
4046 | 232 | scanner = self._getScanner() | 797 | manager = self._getManager() |
4047 | 233 | d = defer.maybeDeferred(scanner.scan) | 798 | d = defer.maybeDeferred(manager.scan) |
4048 | 234 | d.addCallback(self._checkJobRescued, builder, job) | 799 | d.addCallback(self._checkJobRescued, builder, job) |
4049 | 235 | return d | 800 | return d |
4050 | 236 | 801 | ||
4051 | @@ -249,6 +814,8 @@ | |||
4052 | 249 | self.assertBuildingJob(job, builder, logtail='This is a build log') | 814 | self.assertBuildingJob(job, builder, logtail='This is a build log') |
4053 | 250 | 815 | ||
4054 | 251 | def testScanUpdatesBuildingJobs(self): | 816 | def testScanUpdatesBuildingJobs(self): |
4055 | 817 | # The job assigned to a broken builder is rescued. | ||
4056 | 818 | |||
4057 | 252 | # Enable sampledata builder attached to an appropriate testing | 819 | # Enable sampledata builder attached to an appropriate testing |
4058 | 253 | # slave. It will respond as if it was building the sampledata job. | 820 | # slave. It will respond as if it was building the sampledata job. |
4059 | 254 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] | 821 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] |
4060 | @@ -263,174 +830,188 @@ | |||
4061 | 263 | self.assertBuildingJob(job, builder) | 830 | self.assertBuildingJob(job, builder) |
4062 | 264 | 831 | ||
4063 | 265 | # Run 'scan' and check its result. | 832 | # Run 'scan' and check its result. |
4067 | 266 | self.layer.switchDbUser(config.builddmaster.dbuser) | 833 | LaunchpadZopelessLayer.switchDbUser(config.builddmaster.dbuser) |
4068 | 267 | scanner = self._getScanner() | 834 | manager = self._getManager() |
4069 | 268 | d = defer.maybeDeferred(scanner.scan) | 835 | d = defer.maybeDeferred(manager.scan) |
4070 | 269 | d.addCallback(self._checkJobUpdated, builder, job) | 836 | d.addCallback(self._checkJobUpdated, builder, job) |
4071 | 270 | return d | 837 | return d |
4072 | 271 | 838 | ||
4116 | 272 | def test_scan_with_nothing_to_dispatch(self): | 839 | def test_scan_assesses_failure_exceptions(self): |
4074 | 273 | factory = LaunchpadObjectFactory() | ||
4075 | 274 | builder = factory.makeBuilder() | ||
4076 | 275 | builder.setSlaveForTesting(OkSlave()) | ||
4077 | 276 | scanner = self._getScanner(builder_name=builder.name) | ||
4078 | 277 | d = scanner.scan() | ||
4079 | 278 | return d.addCallback(self._checkNoDispatch, builder) | ||
4080 | 279 | |||
4081 | 280 | def test_scan_with_manual_builder(self): | ||
4082 | 281 | # Reset sampledata builder. | ||
4083 | 282 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] | ||
4084 | 283 | self._resetBuilder(builder) | ||
4085 | 284 | builder.setSlaveForTesting(OkSlave()) | ||
4086 | 285 | builder.manual = True | ||
4087 | 286 | scanner = self._getScanner() | ||
4088 | 287 | d = scanner.scan() | ||
4089 | 288 | d.addCallback(self._checkNoDispatch, builder) | ||
4090 | 289 | return d | ||
4091 | 290 | |||
4092 | 291 | def test_scan_with_not_ok_builder(self): | ||
4093 | 292 | # Reset sampledata builder. | ||
4094 | 293 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] | ||
4095 | 294 | self._resetBuilder(builder) | ||
4096 | 295 | builder.setSlaveForTesting(OkSlave()) | ||
4097 | 296 | builder.builderok = False | ||
4098 | 297 | scanner = self._getScanner() | ||
4099 | 298 | d = scanner.scan() | ||
4100 | 299 | # Because the builder is not ok, we can't use _checkNoDispatch. | ||
4101 | 300 | d.addCallback( | ||
4102 | 301 | lambda ignored: self.assertIdentical(None, builder.currentjob)) | ||
4103 | 302 | return d | ||
4104 | 303 | |||
4105 | 304 | def test_scan_of_broken_slave(self): | ||
4106 | 305 | builder = getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME] | ||
4107 | 306 | self._resetBuilder(builder) | ||
4108 | 307 | builder.setSlaveForTesting(BrokenSlave()) | ||
4109 | 308 | builder.failure_count = 0 | ||
4110 | 309 | scanner = self._getScanner(builder_name=builder.name) | ||
4111 | 310 | d = scanner.scan() | ||
4112 | 311 | return self.assertFailure(d, xmlrpclib.Fault) | ||
4113 | 312 | |||
4114 | 313 | def _assertFailureCounting(self, builder_count, job_count, | ||
4115 | 314 | expected_builder_count, expected_job_count): | ||
4117 | 315 | # If scan() fails with an exception, failure_counts should be | 840 | # If scan() fails with an exception, failure_counts should be |
4121 | 316 | # incremented. What we do with the results of the failure | 841 | # incremented and tested. |
4119 | 317 | # counts is tested below separately, this test just makes sure that | ||
4120 | 318 | # scan() is setting the counts. | ||
4122 | 319 | def failing_scan(): | 842 | def failing_scan(): |
4126 | 320 | return defer.fail(Exception("fake exception")) | 843 | raise Exception("fake exception") |
4127 | 321 | scanner = self._getScanner() | 844 | manager = self._getManager() |
4128 | 322 | scanner.scan = failing_scan | 845 | manager.scan = failing_scan |
4129 | 846 | manager.scheduleNextScanCycle = FakeMethod() | ||
4130 | 323 | from lp.buildmaster import manager as manager_module | 847 | from lp.buildmaster import manager as manager_module |
4131 | 324 | self.patch(manager_module, 'assessFailureCounts', FakeMethod()) | 848 | self.patch(manager_module, 'assessFailureCounts', FakeMethod()) |
4141 | 325 | builder = getUtility(IBuilderSet)[scanner.builder_name] | 849 | builder = getUtility(IBuilderSet)[manager.builder_name] |
4142 | 326 | 850 | ||
4143 | 327 | builder.failure_count = builder_count | 851 | # Failure counts start at zero. |
4144 | 328 | builder.currentjob.specific_job.build.failure_count = job_count | 852 | self.assertEqual(0, builder.failure_count) |
4145 | 329 | # The _scanFailed() calls abort, so make sure our existing | 853 | self.assertEqual( |
4146 | 330 | # failure counts are persisted. | 854 | 0, builder.currentjob.specific_job.build.failure_count) |
4147 | 331 | self.layer.txn.commit() | 855 | |
4148 | 332 | 856 | # startCycle() calls scan() which is our fake one that throws an | |
4140 | 333 | # singleCycle() calls scan() which is our fake one that throws an | ||
4149 | 334 | # exception. | 857 | # exception. |
4151 | 335 | d = scanner.singleCycle() | 858 | manager.startCycle() |
4152 | 336 | 859 | ||
4153 | 337 | # Failure counts should be updated, and the assessment method | 860 | # Failure counts should be updated, and the assessment method |
4240 | 338 | # should have been called. The actual behaviour is tested below | 861 | # should have been called. |
4241 | 339 | # in TestFailureAssessments. | 862 | self.assertEqual(1, builder.failure_count) |
4242 | 340 | def got_scan(ignored): | 863 | self.assertEqual( |
4243 | 341 | self.assertEqual(expected_builder_count, builder.failure_count) | 864 | 1, builder.currentjob.specific_job.build.failure_count) |
4244 | 342 | self.assertEqual( | 865 | |
4245 | 343 | expected_job_count, | 866 | self.assertEqual( |
4246 | 344 | builder.currentjob.specific_job.build.failure_count) | 867 | 1, manager_module.assessFailureCounts.call_count) |
4247 | 345 | self.assertEqual( | 868 | |
4248 | 346 | 1, manager_module.assessFailureCounts.call_count) | 869 | |
4249 | 347 | 870 | class TestDispatchResult(LaunchpadTestCase): | |
4250 | 348 | return d.addCallback(got_scan) | 871 | """Tests `BaseDispatchResult` variations. |
4251 | 349 | 872 | ||
4252 | 350 | def test_scan_first_fail(self): | 873 | Variations of `BaseDispatchResult` when evaluated update the database |
4253 | 351 | # The first failure of a job should result in the failure_count | 874 | information according to their purpose. |
4254 | 352 | # on the job and the builder both being incremented. | 875 | """ |
4255 | 353 | self._assertFailureCounting( | 876 | |
4256 | 354 | builder_count=0, job_count=0, expected_builder_count=1, | 877 | layer = LaunchpadZopelessLayer |
4257 | 355 | expected_job_count=1) | 878 | |
4258 | 356 | 879 | def _getBuilder(self, name): | |
4259 | 357 | def test_scan_second_builder_fail(self): | 880 | """Return a fixed `IBuilder` instance from the sampledata. |
4260 | 358 | # The first failure of a job should result in the failure_count | 881 | |
4261 | 359 | # on the job and the builder both being incremented. | 882 | Ensure it's active (builderok=True) and it has a in-progress job. |
4262 | 360 | self._assertFailureCounting( | 883 | """ |
4263 | 361 | builder_count=1, job_count=0, expected_builder_count=2, | 884 | login('foo.bar@canonical.com') |
4264 | 362 | expected_job_count=1) | 885 | |
4265 | 363 | 886 | builder = getUtility(IBuilderSet)[name] | |
4266 | 364 | def test_scan_second_job_fail(self): | 887 | builder.builderok = True |
4267 | 365 | # The first failure of a job should result in the failure_count | 888 | |
4268 | 366 | # on the job and the builder both being incremented. | 889 | job = builder.currentjob |
4269 | 367 | self._assertFailureCounting( | 890 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job) |
4270 | 368 | builder_count=0, job_count=1, expected_builder_count=1, | 891 | self.assertEqual( |
4271 | 369 | expected_job_count=2) | 892 | 'i386 build of mozilla-firefox 0.9 in ubuntu hoary RELEASE', |
4272 | 370 | 893 | build.title) | |
4273 | 371 | def test_scanFailed_handles_lack_of_a_job_on_the_builder(self): | 894 | |
4274 | 372 | def failing_scan(): | 895 | self.assertEqual('BUILDING', build.status.name) |
4275 | 373 | return defer.fail(Exception("fake exception")) | 896 | self.assertNotEqual(None, job.builder) |
4276 | 374 | scanner = self._getScanner() | 897 | self.assertNotEqual(None, job.date_started) |
4277 | 375 | scanner.scan = failing_scan | 898 | self.assertNotEqual(None, job.logtail) |
4278 | 376 | builder = getUtility(IBuilderSet)[scanner.builder_name] | 899 | |
4279 | 377 | builder.failure_count = Builder.FAILURE_THRESHOLD | 900 | transaction.commit() |
4280 | 378 | builder.currentjob.reset() | 901 | |
4281 | 379 | self.layer.txn.commit() | 902 | return builder, job.id |
4282 | 380 | 903 | ||
4283 | 381 | d = scanner.singleCycle() | 904 | def assertBuildqueueIsClean(self, buildqueue): |
4284 | 382 | 905 | # Check that the buildqueue is reset. | |
4285 | 383 | def scan_finished(ignored): | 906 | self.assertEqual(None, buildqueue.builder) |
4286 | 384 | self.assertFalse(builder.builderok) | 907 | self.assertEqual(None, buildqueue.date_started) |
4287 | 385 | 908 | self.assertEqual(None, buildqueue.logtail) | |
4288 | 386 | return d.addCallback(scan_finished) | 909 | |
4289 | 387 | 910 | def assertBuilderIsClean(self, builder): | |
4290 | 388 | def test_fail_to_resume_slave_resets_job(self): | 911 | # Check that the builder is ready for a new build. |
4291 | 389 | # If an attempt to resume and dispatch a slave fails, it should | 912 | self.assertTrue(builder.builderok) |
4292 | 390 | # reset the job via job.reset() | 913 | self.assertIs(None, builder.failnotes) |
4293 | 391 | 914 | self.assertIs(None, builder.currentjob) | |
4294 | 392 | # Make a slave with a failing resume() method. | 915 | |
4295 | 393 | slave = OkSlave() | 916 | def testResetDispatchResult(self): |
4296 | 394 | slave.resume = lambda: deferLater( | 917 | # Test that `ResetDispatchResult` resets the builder and job. |
4297 | 395 | reactor, 0, defer.fail, Failure(('out', 'err', 1))) | 918 | builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME) |
4298 | 396 | 919 | buildqueue_id = builder.currentjob.id | |
4299 | 397 | # Reset sampledata builder. | 920 | builder.builderok = True |
4300 | 398 | builder = removeSecurityProxy( | 921 | builder.failure_count = 1 |
4301 | 399 | getUtility(IBuilderSet)[BOB_THE_BUILDER_NAME]) | 922 | |
4302 | 400 | self._resetBuilder(builder) | 923 | # Setup a interaction to satisfy 'write_transaction' decorator. |
4303 | 401 | self.assertEqual(0, builder.failure_count) | 924 | login(ANONYMOUS) |
4304 | 402 | builder.setSlaveForTesting(slave) | 925 | slave = RecordingSlave(builder.name, builder.url, builder.vm_host) |
4305 | 403 | builder.vm_host = "fake_vm_host" | 926 | result = ResetDispatchResult(slave) |
4306 | 404 | 927 | result() | |
4307 | 405 | scanner = self._getScanner() | 928 | |
4308 | 406 | 929 | buildqueue = getUtility(IBuildQueueSet).get(buildqueue_id) | |
4309 | 407 | # Get the next job that will be dispatched. | 930 | self.assertBuildqueueIsClean(buildqueue) |
4310 | 408 | job = removeSecurityProxy(builder._findBuildCandidate()) | 931 | |
4311 | 409 | job.virtualized = True | 932 | # XXX Julian |
4312 | 410 | builder.virtualized = True | 933 | # Disabled test until bug 586362 is fixed. |
4313 | 411 | d = scanner.singleCycle() | 934 | #self.assertFalse(builder.builderok) |
4314 | 412 | 935 | self.assertBuilderIsClean(builder) | |
4315 | 413 | def check(ignored): | 936 | |
4316 | 414 | # The failure_count will have been incremented on the | 937 | def testFailDispatchResult(self): |
4317 | 415 | # builder, we can check that to see that a dispatch attempt | 938 | # Test that `FailDispatchResult` calls assessFailureCounts() so |
4318 | 416 | # did indeed occur. | 939 | # that we know the builders and jobs are failed as necessary |
4319 | 417 | self.assertEqual(1, builder.failure_count) | 940 | # when a FailDispatchResult is called at the end of the dispatch |
4320 | 418 | # There should also be no builder set on the job. | 941 | # chain. |
4321 | 419 | self.assertTrue(job.builder is None) | 942 | builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME) |
4322 | 420 | build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job) | 943 | |
4323 | 421 | self.assertEqual(build.status, BuildStatus.NEEDSBUILD) | 944 | # Setup a interaction to satisfy 'write_transaction' decorator. |
4324 | 422 | 945 | login(ANONYMOUS) | |
4325 | 423 | return d.addCallback(check) | 946 | slave = RecordingSlave(builder.name, builder.url, builder.vm_host) |
4326 | 947 | result = FailDispatchResult(slave, 'does not work!') | ||
4327 | 948 | result.assessFailureCounts = FakeMethod() | ||
4328 | 949 | self.assertEqual(0, result.assessFailureCounts.call_count) | ||
4329 | 950 | result() | ||
4330 | 951 | self.assertEqual(1, result.assessFailureCounts.call_count) | ||
4331 | 952 | |||
4332 | 953 | def _setup_failing_dispatch_result(self): | ||
4333 | 954 | # assessFailureCounts should fail jobs or builders depending on | ||
4334 | 955 | # whether it sees the failure_counts on each increasing. | ||
4335 | 956 | builder, job_id = self._getBuilder(BOB_THE_BUILDER_NAME) | ||
4336 | 957 | slave = RecordingSlave(builder.name, builder.url, builder.vm_host) | ||
4337 | 958 | result = FailDispatchResult(slave, 'does not work!') | ||
4338 | 959 | return builder, result | ||
4339 | 960 | |||
4340 | 961 | def test_assessFailureCounts_equal_failures(self): | ||
4341 | 962 | # Basic case where the failure counts are equal and the job is | ||
4342 | 963 | # reset to try again & the builder is not failed. | ||
4343 | 964 | builder, result = self._setup_failing_dispatch_result() | ||
4344 | 965 | buildqueue = builder.currentjob | ||
4345 | 966 | build = buildqueue.specific_job.build | ||
4346 | 967 | builder.failure_count = 2 | ||
4347 | 968 | build.failure_count = 2 | ||
4348 | 969 | result.assessFailureCounts() | ||
4349 | 970 | |||
4350 | 971 | self.assertBuilderIsClean(builder) | ||
4351 | 972 | self.assertEqual('NEEDSBUILD', build.status.name) | ||
4352 | 973 | self.assertBuildqueueIsClean(buildqueue) | ||
4353 | 974 | |||
4354 | 975 | def test_assessFailureCounts_job_failed(self): | ||
4355 | 976 | # Case where the job has failed more than the builder. | ||
4356 | 977 | builder, result = self._setup_failing_dispatch_result() | ||
4357 | 978 | buildqueue = builder.currentjob | ||
4358 | 979 | build = buildqueue.specific_job.build | ||
4359 | 980 | build.failure_count = 2 | ||
4360 | 981 | builder.failure_count = 1 | ||
4361 | 982 | result.assessFailureCounts() | ||
4362 | 983 | |||
4363 | 984 | self.assertBuilderIsClean(builder) | ||
4364 | 985 | self.assertEqual('FAILEDTOBUILD', build.status.name) | ||
4365 | 986 | # The buildqueue should have been removed entirely. | ||
4366 | 987 | self.assertEqual( | ||
4367 | 988 | None, getUtility(IBuildQueueSet).getByBuilder(builder), | ||
4368 | 989 | "Buildqueue was not removed when it should be.") | ||
4369 | 990 | |||
4370 | 991 | def test_assessFailureCounts_builder_failed(self): | ||
4371 | 992 | # Case where the builder has failed more than the job. | ||
4372 | 993 | builder, result = self._setup_failing_dispatch_result() | ||
4373 | 994 | buildqueue = builder.currentjob | ||
4374 | 995 | build = buildqueue.specific_job.build | ||
4375 | 996 | build.failure_count = 2 | ||
4376 | 997 | builder.failure_count = 3 | ||
4377 | 998 | result.assessFailureCounts() | ||
4378 | 999 | |||
4379 | 1000 | self.assertFalse(builder.builderok) | ||
4380 | 1001 | self.assertEqual('does not work!', builder.failnotes) | ||
4381 | 1002 | self.assertTrue(builder.currentjob is None) | ||
4382 | 1003 | self.assertEqual('NEEDSBUILD', build.status.name) | ||
4383 | 1004 | self.assertBuildqueueIsClean(buildqueue) | ||
4384 | 424 | 1005 | ||
4385 | 425 | 1006 | ||
4386 | 426 | class TestBuilddManager(TrialTestCase): | 1007 | class TestBuilddManager(TrialTestCase): |
4387 | 427 | 1008 | ||
4389 | 428 | layer = TwistedLaunchpadZopelessLayer | 1009 | layer = LaunchpadZopelessLayer |
4390 | 429 | 1010 | ||
4391 | 430 | def _stub_out_scheduleNextScanCycle(self): | 1011 | def _stub_out_scheduleNextScanCycle(self): |
4392 | 431 | # stub out the code that adds a callLater, so that later tests | 1012 | # stub out the code that adds a callLater, so that later tests |
4393 | 432 | # don't get surprises. | 1013 | # don't get surprises. |
4395 | 433 | self.patch(SlaveScanner, 'startCycle', FakeMethod()) | 1014 | self.patch(SlaveScanner, 'scheduleNextScanCycle', FakeMethod()) |
4396 | 434 | 1015 | ||
4397 | 435 | def test_addScanForBuilders(self): | 1016 | def test_addScanForBuilders(self): |
4398 | 436 | # Test that addScanForBuilders generates NewBuildersScanner objects. | 1017 | # Test that addScanForBuilders generates NewBuildersScanner objects. |
4399 | @@ -459,62 +1040,10 @@ | |||
4400 | 459 | self.assertNotEqual(0, manager.new_builders_scanner.scan.call_count) | 1040 | self.assertNotEqual(0, manager.new_builders_scanner.scan.call_count) |
4401 | 460 | 1041 | ||
4402 | 461 | 1042 | ||
4403 | 462 | class TestFailureAssessments(TestCaseWithFactory): | ||
4404 | 463 | |||
4405 | 464 | layer = ZopelessDatabaseLayer | ||
4406 | 465 | |||
4407 | 466 | def setUp(self): | ||
4408 | 467 | TestCaseWithFactory.setUp(self) | ||
4409 | 468 | self.builder = self.factory.makeBuilder() | ||
4410 | 469 | self.build = self.factory.makeSourcePackageRecipeBuild() | ||
4411 | 470 | self.buildqueue = self.build.queueBuild() | ||
4412 | 471 | self.buildqueue.markAsBuilding(self.builder) | ||
4413 | 472 | |||
4414 | 473 | def test_equal_failures_reset_job(self): | ||
4415 | 474 | self.builder.gotFailure() | ||
4416 | 475 | self.builder.getCurrentBuildFarmJob().gotFailure() | ||
4417 | 476 | |||
4418 | 477 | assessFailureCounts(self.builder, "failnotes") | ||
4419 | 478 | self.assertIs(None, self.builder.currentjob) | ||
4420 | 479 | self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD) | ||
4421 | 480 | |||
4422 | 481 | def test_job_failing_more_than_builder_fails_job(self): | ||
4423 | 482 | self.builder.getCurrentBuildFarmJob().gotFailure() | ||
4424 | 483 | |||
4425 | 484 | assessFailureCounts(self.builder, "failnotes") | ||
4426 | 485 | self.assertIs(None, self.builder.currentjob) | ||
4427 | 486 | self.assertEqual(self.build.status, BuildStatus.FAILEDTOBUILD) | ||
4428 | 487 | |||
4429 | 488 | def test_builder_failing_more_than_job_but_under_fail_threshold(self): | ||
4430 | 489 | self.builder.failure_count = Builder.FAILURE_THRESHOLD - 1 | ||
4431 | 490 | |||
4432 | 491 | assessFailureCounts(self.builder, "failnotes") | ||
4433 | 492 | self.assertIs(None, self.builder.currentjob) | ||
4434 | 493 | self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD) | ||
4435 | 494 | self.assertTrue(self.builder.builderok) | ||
4436 | 495 | |||
4437 | 496 | def test_builder_failing_more_than_job_but_over_fail_threshold(self): | ||
4438 | 497 | self.builder.failure_count = Builder.FAILURE_THRESHOLD | ||
4439 | 498 | |||
4440 | 499 | assessFailureCounts(self.builder, "failnotes") | ||
4441 | 500 | self.assertIs(None, self.builder.currentjob) | ||
4442 | 501 | self.assertEqual(self.build.status, BuildStatus.NEEDSBUILD) | ||
4443 | 502 | self.assertFalse(self.builder.builderok) | ||
4444 | 503 | self.assertEqual("failnotes", self.builder.failnotes) | ||
4445 | 504 | |||
4446 | 505 | def test_builder_failing_with_no_attached_job(self): | ||
4447 | 506 | self.buildqueue.reset() | ||
4448 | 507 | self.builder.failure_count = Builder.FAILURE_THRESHOLD | ||
4449 | 508 | |||
4450 | 509 | assessFailureCounts(self.builder, "failnotes") | ||
4451 | 510 | self.assertFalse(self.builder.builderok) | ||
4452 | 511 | self.assertEqual("failnotes", self.builder.failnotes) | ||
4453 | 512 | |||
4454 | 513 | |||
4455 | 514 | class TestNewBuilders(TrialTestCase): | 1043 | class TestNewBuilders(TrialTestCase): |
4456 | 515 | """Test detecting of new builders.""" | 1044 | """Test detecting of new builders.""" |
4457 | 516 | 1045 | ||
4459 | 517 | layer = TwistedLaunchpadZopelessLayer | 1046 | layer = LaunchpadZopelessLayer |
4460 | 518 | 1047 | ||
4461 | 519 | def _getScanner(self, manager=None, clock=None): | 1048 | def _getScanner(self, manager=None, clock=None): |
4462 | 520 | return NewBuildersScanner(manager=manager, clock=clock) | 1049 | return NewBuildersScanner(manager=manager, clock=clock) |
4463 | @@ -555,8 +1084,11 @@ | |||
4464 | 555 | new_builders, builder_scanner.checkForNewBuilders()) | 1084 | new_builders, builder_scanner.checkForNewBuilders()) |
4465 | 556 | 1085 | ||
4466 | 557 | def test_scan(self): | 1086 | def test_scan(self): |
4468 | 558 | # See if scan detects new builders. | 1087 | # See if scan detects new builders and schedules the next scan. |
4469 | 559 | 1088 | ||
4470 | 1089 | # stub out the addScanForBuilders and scheduleScan methods since | ||
4471 | 1090 | # they use callLater; we only want to assert that they get | ||
4472 | 1091 | # called. | ||
4473 | 560 | def fake_checkForNewBuilders(): | 1092 | def fake_checkForNewBuilders(): |
4474 | 561 | return "new_builders" | 1093 | return "new_builders" |
4475 | 562 | 1094 | ||
4476 | @@ -572,6 +1104,9 @@ | |||
4477 | 572 | builder_scanner.scan() | 1104 | builder_scanner.scan() |
4478 | 573 | advance = NewBuildersScanner.SCAN_INTERVAL + 1 | 1105 | advance = NewBuildersScanner.SCAN_INTERVAL + 1 |
4479 | 574 | clock.advance(advance) | 1106 | clock.advance(advance) |
4480 | 1107 | self.assertNotEqual( | ||
4481 | 1108 | 0, builder_scanner.scheduleScan.call_count, | ||
4482 | 1109 | "scheduleScan did not get called") | ||
4483 | 575 | 1110 | ||
4484 | 576 | 1111 | ||
4485 | 577 | def is_file_growing(filepath, poll_interval=1, poll_repeat=10): | 1112 | def is_file_growing(filepath, poll_interval=1, poll_repeat=10): |
4486 | @@ -612,7 +1147,7 @@ | |||
4487 | 612 | return False | 1147 | return False |
4488 | 613 | 1148 | ||
4489 | 614 | 1149 | ||
4491 | 615 | class TestBuilddManagerScript(TestCaseWithFactory): | 1150 | class TestBuilddManagerScript(LaunchpadTestCase): |
4492 | 616 | 1151 | ||
4493 | 617 | layer = LaunchpadScriptLayer | 1152 | layer = LaunchpadScriptLayer |
4494 | 618 | 1153 | ||
4495 | @@ -621,7 +1156,6 @@ | |||
4496 | 621 | fixture = BuilddManagerTestSetup() | 1156 | fixture = BuilddManagerTestSetup() |
4497 | 622 | fixture.setUp() | 1157 | fixture.setUp() |
4498 | 623 | fixture.tearDown() | 1158 | fixture.tearDown() |
4499 | 624 | self.layer.force_dirty_database() | ||
4500 | 625 | 1159 | ||
4501 | 626 | # XXX Julian 2010-08-06 bug=614275 | 1160 | # XXX Julian 2010-08-06 bug=614275 |
4502 | 627 | # These next 2 tests are in the wrong place, they should be near the | 1161 | # These next 2 tests are in the wrong place, they should be near the |
4503 | 628 | 1162 | ||
4504 | === modified file 'lib/lp/buildmaster/tests/test_packagebuild.py' | |||
4505 | --- lib/lp/buildmaster/tests/test_packagebuild.py 2010-10-26 20:43:50 +0000 | |||
4506 | +++ lib/lp/buildmaster/tests/test_packagebuild.py 2010-12-07 16:24:04 +0000 | |||
4507 | @@ -97,8 +97,6 @@ | |||
4508 | 97 | self.assertRaises( | 97 | self.assertRaises( |
4509 | 98 | NotImplementedError, self.package_build.verifySuccessfulUpload) | 98 | NotImplementedError, self.package_build.verifySuccessfulUpload) |
4510 | 99 | self.assertRaises(NotImplementedError, self.package_build.notify) | 99 | self.assertRaises(NotImplementedError, self.package_build.notify) |
4511 | 100 | # XXX 2010-10-18 bug=662631 | ||
4512 | 101 | # Change this to do non-blocking IO. | ||
4513 | 102 | self.assertRaises( | 100 | self.assertRaises( |
4514 | 103 | NotImplementedError, self.package_build.handleStatus, | 101 | NotImplementedError, self.package_build.handleStatus, |
4515 | 104 | None, None, None) | 102 | None, None, None) |
4516 | @@ -311,8 +309,6 @@ | |||
4517 | 311 | # A filemap with plain filenames should not cause a problem. | 309 | # A filemap with plain filenames should not cause a problem. |
4518 | 312 | # The call to handleStatus will attempt to get the file from | 310 | # The call to handleStatus will attempt to get the file from |
4519 | 313 | # the slave resulting in a URL error in this test case. | 311 | # the slave resulting in a URL error in this test case. |
4520 | 314 | # XXX 2010-10-18 bug=662631 | ||
4521 | 315 | # Change this to do non-blocking IO. | ||
4522 | 316 | self.build.handleStatus('OK', None, { | 312 | self.build.handleStatus('OK', None, { |
4523 | 317 | 'filemap': {'myfile.py': 'test_file_hash'}, | 313 | 'filemap': {'myfile.py': 'test_file_hash'}, |
4524 | 318 | }) | 314 | }) |
4525 | @@ -323,8 +319,6 @@ | |||
4526 | 323 | def test_handleStatus_OK_absolute_filepath(self): | 319 | def test_handleStatus_OK_absolute_filepath(self): |
4527 | 324 | # A filemap that tries to write to files outside of | 320 | # A filemap that tries to write to files outside of |
4528 | 325 | # the upload directory will result in a failed upload. | 321 | # the upload directory will result in a failed upload. |
4529 | 326 | # XXX 2010-10-18 bug=662631 | ||
4530 | 327 | # Change this to do non-blocking IO. | ||
4531 | 328 | self.build.handleStatus('OK', None, { | 322 | self.build.handleStatus('OK', None, { |
4532 | 329 | 'filemap': {'/tmp/myfile.py': 'test_file_hash'}, | 323 | 'filemap': {'/tmp/myfile.py': 'test_file_hash'}, |
4533 | 330 | }) | 324 | }) |
4534 | @@ -335,8 +329,6 @@ | |||
4535 | 335 | def test_handleStatus_OK_relative_filepath(self): | 329 | def test_handleStatus_OK_relative_filepath(self): |
4536 | 336 | # A filemap that tries to write to files outside of | 330 | # A filemap that tries to write to files outside of |
4537 | 337 | # the upload directory will result in a failed upload. | 331 | # the upload directory will result in a failed upload. |
4538 | 338 | # XXX 2010-10-18 bug=662631 | ||
4539 | 339 | # Change this to do non-blocking IO. | ||
4540 | 340 | self.build.handleStatus('OK', None, { | 332 | self.build.handleStatus('OK', None, { |
4541 | 341 | 'filemap': {'../myfile.py': 'test_file_hash'}, | 333 | 'filemap': {'../myfile.py': 'test_file_hash'}, |
4542 | 342 | }) | 334 | }) |
4543 | @@ -347,8 +339,6 @@ | |||
4544 | 347 | # The build log is set during handleStatus. | 339 | # The build log is set during handleStatus. |
4545 | 348 | removeSecurityProxy(self.build).log = None | 340 | removeSecurityProxy(self.build).log = None |
4546 | 349 | self.assertEqual(None, self.build.log) | 341 | self.assertEqual(None, self.build.log) |
4547 | 350 | # XXX 2010-10-18 bug=662631 | ||
4548 | 351 | # Change this to do non-blocking IO. | ||
4549 | 352 | self.build.handleStatus('OK', None, { | 342 | self.build.handleStatus('OK', None, { |
4550 | 353 | 'filemap': {'myfile.py': 'test_file_hash'}, | 343 | 'filemap': {'myfile.py': 'test_file_hash'}, |
4551 | 354 | }) | 344 | }) |
4552 | @@ -358,8 +348,6 @@ | |||
4553 | 358 | # The date finished is updated during handleStatus_OK. | 348 | # The date finished is updated during handleStatus_OK. |
4554 | 359 | removeSecurityProxy(self.build).date_finished = None | 349 | removeSecurityProxy(self.build).date_finished = None |
4555 | 360 | self.assertEqual(None, self.build.date_finished) | 350 | self.assertEqual(None, self.build.date_finished) |
4556 | 361 | # XXX 2010-10-18 bug=662631 | ||
4557 | 362 | # Change this to do non-blocking IO. | ||
4558 | 363 | self.build.handleStatus('OK', None, { | 351 | self.build.handleStatus('OK', None, { |
4559 | 364 | 'filemap': {'myfile.py': 'test_file_hash'}, | 352 | 'filemap': {'myfile.py': 'test_file_hash'}, |
4560 | 365 | }) | 353 | }) |
4561 | 366 | 354 | ||
4562 | === modified file 'lib/lp/code/model/recipebuilder.py' | |||
4563 | --- lib/lp/code/model/recipebuilder.py 2010-09-24 12:47:12 +0000 | |||
4564 | +++ lib/lp/code/model/recipebuilder.py 2010-12-07 16:24:04 +0000 | |||
4565 | @@ -117,42 +117,38 @@ | |||
4566 | 117 | raise CannotBuild("Unable to find distroarchseries for %s in %s" % | 117 | raise CannotBuild("Unable to find distroarchseries for %s in %s" % |
4567 | 118 | (self._builder.processor.name, | 118 | (self._builder.processor.name, |
4568 | 119 | self.build.distroseries.displayname)) | 119 | self.build.distroseries.displayname)) |
4570 | 120 | args = self._extraBuildArgs(distroarchseries, logger) | 120 | |
4571 | 121 | chroot = distroarchseries.getChroot() | 121 | chroot = distroarchseries.getChroot() |
4572 | 122 | if chroot is None: | 122 | if chroot is None: |
4573 | 123 | raise CannotBuild("Unable to find a chroot for %s" % | 123 | raise CannotBuild("Unable to find a chroot for %s" % |
4574 | 124 | distroarchseries.displayname) | 124 | distroarchseries.displayname) |
4606 | 125 | d = self._builder.slave.cacheFile(logger, chroot) | 125 | self._builder.slave.cacheFile(logger, chroot) |
4607 | 126 | 126 | ||
4608 | 127 | def got_cache_file(ignored): | 127 | # Generate a string which can be used to cross-check when obtaining |
4609 | 128 | # Generate a string which can be used to cross-check when obtaining | 128 | # results so we know we are referring to the right database object in |
4610 | 129 | # results so we know we are referring to the right database object in | 129 | # subsequent runs. |
4611 | 130 | # subsequent runs. | 130 | buildid = "%s-%s" % (self.build.id, build_queue_id) |
4612 | 131 | buildid = "%s-%s" % (self.build.id, build_queue_id) | 131 | cookie = self.buildfarmjob.generateSlaveBuildCookie() |
4613 | 132 | cookie = self.buildfarmjob.generateSlaveBuildCookie() | 132 | chroot_sha1 = chroot.content.sha1 |
4614 | 133 | chroot_sha1 = chroot.content.sha1 | 133 | logger.debug( |
4615 | 134 | logger.debug( | 134 | "Initiating build %s on %s" % (buildid, self._builder.url)) |
4616 | 135 | "Initiating build %s on %s" % (buildid, self._builder.url)) | 135 | |
4617 | 136 | 136 | args = self._extraBuildArgs(distroarchseries, logger) | |
4618 | 137 | return self._builder.slave.build( | 137 | status, info = self._builder.slave.build( |
4619 | 138 | cookie, "sourcepackagerecipe", chroot_sha1, {}, args) | 138 | cookie, "sourcepackagerecipe", chroot_sha1, {}, args) |
4620 | 139 | 139 | message = """%s (%s): | |
4621 | 140 | def log_build_result((status, info)): | 140 | ***** RESULT ***** |
4622 | 141 | message = """%s (%s): | 141 | %s |
4623 | 142 | ***** RESULT ***** | 142 | %s: %s |
4624 | 143 | %s | 143 | ****************** |
4625 | 144 | %s: %s | 144 | """ % ( |
4626 | 145 | ****************** | 145 | self._builder.name, |
4627 | 146 | """ % ( | 146 | self._builder.url, |
4628 | 147 | self._builder.name, | 147 | args, |
4629 | 148 | self._builder.url, | 148 | status, |
4630 | 149 | args, | 149 | info, |
4631 | 150 | status, | 150 | ) |
4632 | 151 | info, | 151 | logger.info(message) |
4602 | 152 | ) | ||
4603 | 153 | logger.info(message) | ||
4604 | 154 | |||
4605 | 155 | return d.addCallback(got_cache_file).addCallback(log_build_result) | ||
4633 | 156 | 152 | ||
4634 | 157 | def verifyBuildRequest(self, logger): | 153 | def verifyBuildRequest(self, logger): |
4635 | 158 | """Assert some pre-build checks. | 154 | """Assert some pre-build checks. |
4636 | 159 | 155 | ||
4637 | === modified file 'lib/lp/soyuz/browser/tests/test_builder_views.py' | |||
4638 | --- lib/lp/soyuz/browser/tests/test_builder_views.py 2010-10-06 12:20:03 +0000 | |||
4639 | +++ lib/lp/soyuz/browser/tests/test_builder_views.py 2010-12-07 16:24:04 +0000 | |||
4640 | @@ -34,7 +34,7 @@ | |||
4641 | 34 | return view | 34 | return view |
4642 | 35 | 35 | ||
4643 | 36 | def test_posting_form_doesnt_call_slave_xmlrpc(self): | 36 | def test_posting_form_doesnt_call_slave_xmlrpc(self): |
4645 | 37 | # Posting the +edit for should not call isAvailable, which | 37 | # Posting the +edit for should not call is_available, which |
4646 | 38 | # would do xmlrpc to a slave builder and is explicitly forbidden | 38 | # would do xmlrpc to a slave builder and is explicitly forbidden |
4647 | 39 | # in a webapp process. | 39 | # in a webapp process. |
4648 | 40 | view = self.initialize_view() | 40 | view = self.initialize_view() |
4649 | 41 | 41 | ||
4650 | === added file 'lib/lp/soyuz/doc/buildd-dispatching.txt' | |||
4651 | --- lib/lp/soyuz/doc/buildd-dispatching.txt 1970-01-01 00:00:00 +0000 | |||
4652 | +++ lib/lp/soyuz/doc/buildd-dispatching.txt 2010-12-07 16:24:04 +0000 | |||
4653 | @@ -0,0 +1,371 @@ | |||
4654 | 1 | = Buildd Dispatching = | ||
4655 | 2 | |||
4656 | 3 | >>> import transaction | ||
4657 | 4 | >>> import logging | ||
4658 | 5 | >>> logger = logging.getLogger() | ||
4659 | 6 | >>> logger.setLevel(logging.DEBUG) | ||
4660 | 7 | |||
4661 | 8 | The buildd dispatching basically consists of finding a available | ||
4662 | 9 | slave in IDLE state, pushing any required files to it, then requesting | ||
4663 | 10 | that it starts the build procedure. These tasks are implemented by the | ||
4664 | 11 | BuilderSet and Builder classes. | ||
4665 | 12 | |||
4666 | 13 | Setup the test builder: | ||
4667 | 14 | |||
4668 | 15 | >>> from canonical.buildd.tests import BuilddSlaveTestSetup | ||
4669 | 16 | >>> fixture = BuilddSlaveTestSetup() | ||
4670 | 17 | >>> fixture.setUp() | ||
4671 | 18 | |||
4672 | 19 | Setup a suitable chroot for Hoary i386: | ||
4673 | 20 | |||
4674 | 21 | >>> from StringIO import StringIO | ||
4675 | 22 | >>> from canonical.librarian.interfaces import ILibrarianClient | ||
4676 | 23 | >>> librarian_client = getUtility(ILibrarianClient) | ||
4677 | 24 | |||
4678 | 25 | >>> content = 'anything' | ||
4679 | 26 | >>> alias_id = librarian_client.addFile( | ||
4680 | 27 | ... 'foo.tar.gz', len(content), StringIO(content), 'text/plain') | ||
4681 | 28 | |||
4682 | 29 | >>> from canonical.launchpad.interfaces.librarian import ILibraryFileAliasSet | ||
4683 | 30 | >>> from lp.registry.interfaces.distribution import IDistributionSet | ||
4684 | 31 | >>> from lp.registry.interfaces.pocket import PackagePublishingPocket | ||
4685 | 32 | |||
4686 | 33 | >>> hoary = getUtility(IDistributionSet)['ubuntu']['hoary'] | ||
4687 | 34 | >>> hoary_i386 = hoary['i386'] | ||
4688 | 35 | |||
4689 | 36 | >>> chroot = getUtility(ILibraryFileAliasSet)[alias_id] | ||
4690 | 37 | >>> pc = hoary_i386.addOrUpdateChroot(chroot=chroot) | ||
4691 | 38 | |||
4692 | 39 | Activate builders present in sampledata, we need to be logged in as a | ||
4693 | 40 | member of launchpad-buildd-admin: | ||
4694 | 41 | |||
4695 | 42 | >>> from canonical.launchpad.ftests import login | ||
4696 | 43 | >>> login('celso.providelo@canonical.com') | ||
4697 | 44 | |||
4698 | 45 | Set IBuilder.builderok of all present builders: | ||
4699 | 46 | |||
4700 | 47 | >>> from lp.buildmaster.interfaces.builder import IBuilderSet | ||
4701 | 48 | >>> builder_set = getUtility(IBuilderSet) | ||
4702 | 49 | |||
4703 | 50 | >>> builder_set.count() | ||
4704 | 51 | 2 | ||
4705 | 52 | |||
4706 | 53 | >>> from canonical.launchpad.ftests import syncUpdate | ||
4707 | 54 | >>> for b in builder_set: | ||
4708 | 55 | ... b.builderok = True | ||
4709 | 56 | ... syncUpdate(b) | ||
4710 | 57 | |||
4711 | 58 | Clean up previous BuildQueue results from sampledata: | ||
4712 | 59 | |||
4713 | 60 | >>> from lp.buildmaster.interfaces.buildqueue import IBuildQueueSet | ||
4714 | 61 | >>> lost_job = getUtility(IBuildQueueSet).get(1) | ||
4715 | 62 | >>> lost_job.builder.name | ||
4716 | 63 | u'bob' | ||
4717 | 64 | >>> lost_job.destroySelf() | ||
4718 | 65 | >>> transaction.commit() | ||
4719 | 66 | |||
4720 | 67 | If the specified buildd slave reset command (used inside resumeSlaveHost()) | ||
4721 | 68 | fails, the slave will still be marked as failed. | ||
4722 | 69 | |||
4723 | 70 | >>> from canonical.config import config | ||
4724 | 71 | >>> reset_fail_config = ''' | ||
4725 | 72 | ... [builddmaster] | ||
4726 | 73 | ... vm_resume_command: /bin/false''' | ||
4727 | 74 | >>> config.push('reset fail', reset_fail_config) | ||
4728 | 75 | >>> frog_builder = builder_set['frog'] | ||
4729 | 76 | >>> frog_builder.handleTimeout(logger, 'The universe just collapsed') | ||
4730 | 77 | WARNING:root:Resetting builder: http://localhost:9221/ -- The universe just collapsed | ||
4731 | 78 | ... | ||
4732 | 79 | WARNING:root:Failed to reset builder: http://localhost:9221/ -- Resuming failed: | ||
4733 | 80 | ... | ||
4734 | 81 | WARNING:root:Disabling builder: http://localhost:9221/ -- The universe just collapsed | ||
4735 | 82 | ... | ||
4736 | 83 | <BLANKLINE> | ||
4737 | 84 | |||
4738 | 85 | Since we were unable to reset the 'frog' builder it was marked as 'failed'. | ||
4739 | 86 | |||
4740 | 87 | >>> frog_builder.builderok | ||
4741 | 88 | False | ||
4742 | 89 | |||
4743 | 90 | Restore default value for resume command. | ||
4744 | 91 | |||
4745 | 92 | >>> ignored_config = config.pop('reset fail') | ||
4746 | 93 | |||
4747 | 94 | The 'bob' builder is available for build jobs. | ||
4748 | 95 | |||
4749 | 96 | >>> bob_builder = builder_set['bob'] | ||
4750 | 97 | >>> bob_builder.name | ||
4751 | 98 | u'bob' | ||
4752 | 99 | >>> bob_builder.virtualized | ||
4753 | 100 | False | ||
4754 | 101 | >>> bob_builder.is_available | ||
4755 | 102 | True | ||
4756 | 103 | >>> bob_builder.builderok | ||
4757 | 104 | True | ||
4758 | 105 | |||
4759 | 106 | |||
4760 | 107 | == Builder dispatching API == | ||
4761 | 108 | |||
4762 | 109 | Now let's check the build candidates which will be considered for the | ||
4763 | 110 | builder 'bob': | ||
4764 | 111 | |||
4765 | 112 | >>> from zope.security.proxy import removeSecurityProxy | ||
4766 | 113 | >>> job = removeSecurityProxy(bob_builder)._findBuildCandidate() | ||
4767 | 114 | |||
4768 | 115 | The single BuildQueue found is a non-virtual pending build: | ||
4769 | 116 | |||
4770 | 117 | >>> job.id | ||
4771 | 118 | 2 | ||
4772 | 119 | >>> from lp.soyuz.interfaces.binarypackagebuild import ( | ||
4773 | 120 | ... IBinaryPackageBuildSet) | ||
4774 | 121 | >>> build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(job) | ||
4775 | 122 | >>> build.status.name | ||
4776 | 123 | 'NEEDSBUILD' | ||
4777 | 124 | >>> job.builder is None | ||
4778 | 125 | True | ||
4779 | 126 | >>> job.date_started is None | ||
4780 | 127 | True | ||
4781 | 128 | >>> build.is_virtualized | ||
4782 | 129 | False | ||
4783 | 130 | |||
4784 | 131 | The build start time is not set yet either. | ||
4785 | 132 | |||
4786 | 133 | >>> print build.date_first_dispatched | ||
4787 | 134 | None | ||
4788 | 135 | |||
4789 | 136 | Update the SourcePackageReleaseFile corresponding to this job: | ||
4790 | 137 | |||
4791 | 138 | >>> content = 'anything' | ||
4792 | 139 | >>> alias_id = librarian_client.addFile( | ||
4793 | 140 | ... 'foo.dsc', len(content), StringIO(content), 'application/dsc') | ||
4794 | 141 | |||
4795 | 142 | >>> sprf = build.source_package_release.files[0] | ||
4796 | 143 | >>> naked_sprf = removeSecurityProxy(sprf) | ||
4797 | 144 | >>> naked_sprf.libraryfile = getUtility(ILibraryFileAliasSet)[alias_id] | ||
4798 | 145 | >>> flush_database_updates() | ||
4799 | 146 | |||
4800 | 147 | Check the dispatching method itself: | ||
4801 | 148 | |||
4802 | 149 | >>> dispatched_job = bob_builder.findAndStartJob() | ||
4803 | 150 | >>> job == dispatched_job | ||
4804 | 151 | True | ||
4805 | 152 | >>> bob_builder.builderok = True | ||
4806 | 153 | |||
4807 | 154 | >>> flush_database_updates() | ||
4808 | 155 | |||
4809 | 156 | Verify if the job (BuildQueue) was updated appropriately: | ||
4810 | 157 | |||
4811 | 158 | >>> job.builder.id == bob_builder.id | ||
4812 | 159 | True | ||
4813 | 160 | |||
4814 | 161 | >>> dispatched_build = getUtility( | ||
4815 | 162 | ... IBinaryPackageBuildSet).getByQueueEntry(job) | ||
4816 | 163 | >>> dispatched_build == build | ||
4817 | 164 | True | ||
4818 | 165 | |||
4819 | 166 | >>> build.status.name | ||
4820 | 167 | 'BUILDING' | ||
4821 | 168 | |||
4822 | 169 | Shutdown builder, mark the build record as failed and remove the | ||
4823 | 170 | buildqueue record, so the build was eliminated: | ||
4824 | 171 | |||
4825 | 172 | >>> fixture.tearDown() | ||
4826 | 173 | |||
4827 | 174 | >>> from lp.buildmaster.enums import BuildStatus | ||
4828 | 175 | >>> build.status = BuildStatus.FAILEDTOBUILD | ||
4829 | 176 | >>> job.destroySelf() | ||
4830 | 177 | >>> flush_database_updates() | ||
4831 | 178 | |||
4832 | 179 | |||
4833 | 180 | == PPA build dispatching == | ||
4834 | 181 | |||
4835 | 182 | Create a new Build record of the same source targeted for a PPA archive: | ||
4836 | 183 | |||
4837 | 184 | >>> from lp.registry.interfaces.person import IPersonSet | ||
4838 | 185 | >>> cprov = getUtility(IPersonSet).getByName('cprov') | ||
4839 | 186 | |||
4840 | 187 | >>> ppa_build = sprf.sourcepackagerelease.createBuild( | ||
4841 | 188 | ... hoary_i386, PackagePublishingPocket.RELEASE, cprov.archive) | ||
4842 | 189 | |||
4843 | 190 | Create BuildQueue record and inspect some parameters: | ||
4844 | 191 | |||
4845 | 192 | >>> ppa_job = ppa_build.queueBuild() | ||
4846 | 193 | >>> ppa_job.id | ||
4847 | 194 | 3 | ||
4848 | 195 | >>> ppa_job.builder == None | ||
4849 | 196 | True | ||
4850 | 197 | >>> ppa_job.date_started == None | ||
4851 | 198 | True | ||
4852 | 199 | |||
4853 | 200 | The build job's archive requires virtualized builds. | ||
4854 | 201 | |||
4855 | 202 | >>> build = getUtility(IBinaryPackageBuildSet).getByQueueEntry(ppa_job) | ||
4856 | 203 | >>> build.archive.require_virtualized | ||
4857 | 204 | True | ||
4858 | 205 | |||
4859 | 206 | But the builder is not virtualized. | ||
4860 | 207 | |||
4861 | 208 | >>> bob_builder.virtualized | ||
4862 | 209 | False | ||
4863 | 210 | |||
4864 | 211 | Hence, the builder will not be able to pick up the PPA build job created | ||
4865 | 212 | above. | ||
4866 | 213 | |||
4867 | 214 | >>> bob_builder.vm_host = 'localhost.ppa' | ||
4868 | 215 | >>> syncUpdate(bob_builder) | ||
4869 | 216 | |||
4870 | 217 | >>> job = removeSecurityProxy(bob_builder)._findBuildCandidate() | ||
4871 | 218 | >>> print job | ||
4872 | 219 | None | ||
4873 | 220 | |||
4874 | 221 | In order to enable 'bob' to find and build the PPA job, we have to | ||
4875 | 222 | change it to virtualized. This is because PPA builds will only build | ||
4876 | 223 | on virtualized builders. We also need to make sure this build's source | ||
4877 | 224 | is published, or it will also be ignored (by superseding it). We can | ||
4878 | 225 | do this by copying the existing publication in Ubuntu. | ||
4879 | 226 | |||
4880 | 227 | >>> from lp.soyuz.model.publishing import ( | ||
4881 | 228 | ... SourcePackagePublishingHistory) | ||
4882 | 229 | >>> [old_pub] = SourcePackagePublishingHistory.selectBy( | ||
4883 | 230 | ... distroseries=build.distro_series, | ||
4884 | 231 | ... sourcepackagerelease=build.source_package_release) | ||
4885 | 232 | >>> new_pub = old_pub.copyTo( | ||
4886 | 233 | ... old_pub.distroseries, old_pub.pocket, build.archive) | ||
4887 | 234 | |||
4888 | 235 | >>> bob_builder.virtualized = True | ||
4889 | 236 | >>> syncUpdate(bob_builder) | ||
4890 | 237 | |||
4891 | 238 | >>> job = removeSecurityProxy(bob_builder)._findBuildCandidate() | ||
4892 | 239 | >>> ppa_job.id == job.id | ||
4893 | 240 | True | ||
4894 | 241 | |||
4895 | 242 | For further details regarding IBuilder._findBuildCandidate() please see | ||
4896 | 243 | lib/lp/soyuz/tests/test_builder.py. | ||
4897 | 244 | |||
4898 | 245 | Start buildd-slave to be able to dispatch jobs. | ||
4899 | 246 | |||
4900 | 247 | >>> fixture = BuilddSlaveTestSetup() | ||
4901 | 248 | >>> fixture.setUp() | ||
4902 | 249 | |||
4903 | 250 | Before dispatching we can check if the builder is protected against | ||
4904 | 251 | mistakes in code that results in a attempt to build a virtual job in | ||
4905 | 252 | a non-virtual build. | ||
4906 | 253 | |||
4907 | 254 | >>> bob_builder.virtualized = False | ||
4908 | 255 | >>> flush_database_updates() | ||
4909 | 256 | >>> removeSecurityProxy(bob_builder)._dispatchBuildCandidate(ppa_job) | ||
4910 | 257 | Traceback (most recent call last): | ||
4911 | 258 | ... | ||
4912 | 259 | AssertionError: Attempt to build non-virtual item on a virtual builder. | ||
4913 | 260 | |||
4914 | 261 | Mark the builder as virtual again, so we can dispatch the ppa job | ||
4915 | 262 | successfully. | ||
4916 | 263 | |||
4917 | 264 | >>> bob_builder.virtualized = True | ||
4918 | 265 | >>> flush_database_updates() | ||
4919 | 266 | |||
4920 | 267 | >>> dispatched_job = bob_builder.findAndStartJob() | ||
4921 | 268 | >>> ppa_job == dispatched_job | ||
4922 | 269 | True | ||
4923 | 270 | |||
4924 | 271 | >>> flush_database_updates() | ||
4925 | 272 | |||
4926 | 273 | PPA job is building. | ||
4927 | 274 | |||
4928 | 275 | >>> ppa_job.builder.name | ||
4929 | 276 | u'bob' | ||
4930 | 277 | |||
4931 | 278 | >>> build.status.name | ||
4932 | 279 | 'BUILDING' | ||
4933 | 280 | |||
4934 | 281 | Shutdown builder slave, mark the ppa build record as failed, remove the | ||
4935 | 282 | buildqueue record and make 'bob' builder non-virtual again, so the | ||
4936 | 283 | environment is back to the initial state. | ||
4937 | 284 | |||
4938 | 285 | >>> fixture.tearDown() | ||
4939 | 286 | |||
4940 | 287 | >>> build.status = BuildStatus.FAILEDTOBUILD | ||
4941 | 288 | >>> ppa_job.destroySelf() | ||
4942 | 289 | >>> bob_builder.virtualized = False | ||
4943 | 290 | >>> flush_database_updates() | ||
4944 | 291 | |||
4945 | 292 | |||
4946 | 293 | == Security build dispatching == | ||
4947 | 294 | |||
4948 | 295 | Setup chroot for warty/i386. | ||
4949 | 296 | |||
4950 | 297 | >>> warty = getUtility(IDistributionSet)['ubuntu']['warty'] | ||
4951 | 298 | >>> warty_i386 = warty['i386'] | ||
4952 | 299 | >>> pc = warty_i386.addOrUpdateChroot(chroot=chroot) | ||
4953 | 300 | |||
4954 | 301 | Create a new Build record for test source targeted to warty/i386 | ||
4955 | 302 | architecture and SECURITY pocket: | ||
4956 | 303 | |||
4957 | 304 | >>> sec_build = sprf.sourcepackagerelease.createBuild( | ||
4958 | 305 | ... warty_i386, PackagePublishingPocket.SECURITY, hoary.main_archive) | ||
4959 | 306 | |||
4960 | 307 | Create BuildQueue record and inspect some parameters: | ||
4961 | 308 | |||
4962 | 309 | >>> sec_job = sec_build.queueBuild() | ||
4963 | 310 | >>> sec_job.id | ||
4964 | 311 | 4 | ||
4965 | 312 | >>> print sec_job.builder | ||
4966 | 313 | None | ||
4967 | 314 | >>> print sec_job.date_started | ||
4968 | 315 | None | ||
4969 | 316 | >>> sec_build.is_virtualized | ||
4970 | 317 | False | ||
4971 | 318 | |||
4972 | 319 | In normal conditions the next available candidate would be the job | ||
4973 | 320 | targeted to SECURITY pocket. However, the builders are forbidden to | ||
4974 | 321 | accept such jobs until we have finished the EMBARGOED archive | ||
4975 | 322 | implementation. | ||
4976 | 323 | |||
4977 | 324 | >>> fixture = BuilddSlaveTestSetup() | ||
4978 | 325 | >>> fixture.setUp() | ||
4979 | 326 | >>> removeSecurityProxy(bob_builder)._dispatchBuildCandidate(sec_job) | ||
4980 | 327 | Traceback (most recent call last): | ||
4981 | 328 | ... | ||
4982 | 329 | AssertionError: Soyuz is not yet capable of building SECURITY uploads. | ||
4983 | 330 | >>> fixture.tearDown() | ||
4984 | 331 | |||
4985 | 332 | To solve this problem temporarily until we start building security | ||
4986 | 333 | uploads, we will mark builds targeted to the SECURITY pocket as | ||
4987 | 334 | FAILEDTOBUILD during the _findBuildCandidate look-up. | ||
4988 | 335 | |||
4989 | 336 | We will also create another build candidate in breezy-autotest/i386 to | ||
4990 | 337 | check if legitimate pending candidates will remain valid. | ||
4991 | 338 | |||
4992 | 339 | >>> breezy = getUtility(IDistributionSet)['ubuntu']['breezy-autotest'] | ||
4993 | 340 | >>> breezy_i386 = breezy['i386'] | ||
4994 | 341 | >>> pc = breezy_i386.addOrUpdateChroot(chroot=chroot) | ||
4995 | 342 | |||
4996 | 343 | >>> pending_build = sprf.sourcepackagerelease.createBuild( | ||
4997 | 344 | ... breezy_i386, PackagePublishingPocket.UPDATES, hoary.main_archive) | ||
4998 | 345 | >>> pending_job = pending_build.queueBuild() | ||
4999 | 346 | |||
5000 | 347 | We set the score of the security job to ensure it is considered |
The diff has been truncated for viewing.
There are additional revisions which have not been approved in review. Please seek review and approval of these new revisions.