Merge lp:~mwhudson/launchpad/no-hosted-area-include-launchpad-loggerhead into lp:launchpad
- no-hosted-area-include-launchpad-loggerhead
- Merge into devel
Proposed by
Michael Hudson-Doyle
Status: | Merged |
---|---|
Approved by: | Tim Penhey |
Approved revision: | no longer in the source branch. |
Merged at revision: | 10828 |
Proposed branch: | lp:~mwhudson/launchpad/no-hosted-area-include-launchpad-loggerhead |
Merge into: | lp:launchpad |
Prerequisite: | lp:~mwhudson/launchpad/no-hosted-area-server-catchup |
Diff against target: |
709 lines (+641/-4) 9 files modified
Makefile (+3/-3) lib/launchpad_loggerhead/__init__.py (+1/-0) lib/launchpad_loggerhead/app.py (+232/-0) lib/launchpad_loggerhead/debug.py (+120/-0) lib/launchpad_loggerhead/session.py (+73/-0) lib/launchpad_loggerhead/static/robots.txt (+2/-0) scripts/start-loggerhead.py (+177/-0) scripts/stop-loggerhead.py (+33/-0) utilities/sourcedeps.conf (+0/-1) |
To merge this branch: | bzr merge lp:~mwhudson/launchpad/no-hosted-area-include-launchpad-loggerhead |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Tim Penhey (community) | Approve | ||
Review via email: mp+24193@code.launchpad.net |
Commit message
Description of the change
Hi Tim,
This branch replaces https:/
Cheers,
mwh
To post a comment you must log in.
Revision history for this message
Tim Penhey (thumper) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'Makefile' | |||
2 | --- Makefile 2010-04-27 03:45:51 +0000 | |||
3 | +++ Makefile 2010-04-27 03:46:17 +0000 | |||
4 | @@ -229,13 +229,13 @@ | |||
5 | 229 | -i $(LPCONFIG) | 229 | -i $(LPCONFIG) |
6 | 230 | 230 | ||
7 | 231 | run_codebrowse: build | 231 | run_codebrowse: build |
9 | 232 | BZR_PLUGIN_PATH=bzrplugins $(PY) sourcecode/launchpad-loggerhead/start-loggerhead.py -f | 232 | BZR_PLUGIN_PATH=bzrplugins $(PY) scripts/start-loggerhead.py -f |
10 | 233 | 233 | ||
11 | 234 | start_codebrowse: build | 234 | start_codebrowse: build |
13 | 235 | BZR_PLUGIN_PATH=$(shell pwd)/bzrplugins $(PY) sourcecode/launchpad-loggerhead/start-loggerhead.py | 235 | BZR_PLUGIN_PATH=$(shell pwd)/bzrplugins $(PY) scripts/start-loggerhead.py |
14 | 236 | 236 | ||
15 | 237 | stop_codebrowse: | 237 | stop_codebrowse: |
17 | 238 | $(PY) sourcecode/launchpad-loggerhead/stop-loggerhead.py | 238 | $(PY) scripts/stop-loggerhead.py |
18 | 239 | 239 | ||
19 | 240 | run_codehosting: check_schema inplace stop hosted_branches | 240 | run_codehosting: check_schema inplace stop hosted_branches |
20 | 241 | $(RM) thread*.request | 241 | $(RM) thread*.request |
21 | 242 | 242 | ||
22 | === added directory 'lib/launchpad_loggerhead' | |||
23 | === removed symlink 'lib/launchpad_loggerhead' | |||
24 | === target was u'../sourcecode/launchpad-loggerhead/launchpad_loggerhead/' | |||
25 | === added file 'lib/launchpad_loggerhead/__init__.py' | |||
26 | --- lib/launchpad_loggerhead/__init__.py 1970-01-01 00:00:00 +0000 | |||
27 | +++ lib/launchpad_loggerhead/__init__.py 2010-04-27 03:46:17 +0000 | |||
28 | @@ -0,0 +1,1 @@ | |||
29 | 1 | |||
30 | 0 | 2 | ||
31 | === added file 'lib/launchpad_loggerhead/app.py' | |||
32 | --- lib/launchpad_loggerhead/app.py 1970-01-01 00:00:00 +0000 | |||
33 | +++ lib/launchpad_loggerhead/app.py 2010-04-27 03:46:17 +0000 | |||
34 | @@ -0,0 +1,232 @@ | |||
35 | 1 | # Copyright 2009 Canonical Ltd. This software is licensed under the | ||
36 | 2 | # GNU Affero General Public License version 3 (see the file LICENSE). | ||
37 | 3 | |||
38 | 4 | import logging | ||
39 | 5 | import re | ||
40 | 6 | import os | ||
41 | 7 | import threading | ||
42 | 8 | import urllib | ||
43 | 9 | import urlparse | ||
44 | 10 | import xmlrpclib | ||
45 | 11 | |||
46 | 12 | from bzrlib import branch, errors, lru_cache, urlutils | ||
47 | 13 | |||
48 | 14 | from loggerhead.apps import favicon_app, static_app | ||
49 | 15 | from loggerhead.apps.branch import BranchWSGIApp | ||
50 | 16 | |||
51 | 17 | from openid.extensions.sreg import SRegRequest, SRegResponse | ||
52 | 18 | from openid.consumer.consumer import CANCEL, Consumer, FAILURE, SUCCESS | ||
53 | 19 | from openid.store.memstore import MemoryStore | ||
54 | 20 | |||
55 | 21 | from paste.fileapp import DataApp | ||
56 | 22 | from paste.request import construct_url, parse_querystring, path_info_pop | ||
57 | 23 | from paste.httpexceptions import ( | ||
58 | 24 | HTTPMovedPermanently, HTTPNotFound, HTTPUnauthorized) | ||
59 | 25 | |||
60 | 26 | from canonical.config import config | ||
61 | 27 | from canonical.launchpad.xmlrpc import faults | ||
62 | 28 | from lp.code.interfaces.codehosting import ( | ||
63 | 29 | BRANCH_TRANSPORT, LAUNCHPAD_ANONYMOUS, LAUNCHPAD_SERVICES) | ||
64 | 30 | from lp.codehosting.vfs import branch_id_to_path | ||
65 | 31 | |||
66 | 32 | robots_txt = '''\ | ||
67 | 33 | User-agent: * | ||
68 | 34 | Disallow: / | ||
69 | 35 | ''' | ||
70 | 36 | |||
71 | 37 | robots_app = DataApp(robots_txt, content_type='text/plain') | ||
72 | 38 | |||
73 | 39 | |||
74 | 40 | thread_transports = threading.local() | ||
75 | 41 | |||
76 | 42 | def valid_launchpad_name(s): | ||
77 | 43 | return re.match('^[a-z0-9][a-z0-9\+\.\-]*$', s) is not None | ||
78 | 44 | |||
79 | 45 | |||
80 | 46 | def valid_launchpad_user_name(s): | ||
81 | 47 | return re.match('^~[a-z0-9][a-z0-9\+\.\-]*$', s) is not None | ||
82 | 48 | |||
83 | 49 | |||
84 | 50 | def valid_launchpad_branch_name(s): | ||
85 | 51 | return re.match(r'^(?i)[a-z0-9][a-z0-9+\.\-@_]*\Z', s) is not None | ||
86 | 52 | |||
87 | 53 | |||
88 | 54 | class RootApp: | ||
89 | 55 | |||
90 | 56 | def __init__(self, session_var): | ||
91 | 57 | self.graph_cache = lru_cache.LRUCache(10) | ||
92 | 58 | self.branchfs = xmlrpclib.ServerProxy( | ||
93 | 59 | config.codehosting.branchfs_endpoint) | ||
94 | 60 | self.session_var = session_var | ||
95 | 61 | self.store = MemoryStore() | ||
96 | 62 | self.log = logging.getLogger('lp-loggerhead') | ||
97 | 63 | branch.Branch.hooks.install_named_hook( | ||
98 | 64 | 'transform_fallback_location', | ||
99 | 65 | self._transform_fallback_location_hook, | ||
100 | 66 | 'RootApp._transform_fallback_location_hook') | ||
101 | 67 | |||
102 | 68 | def _transform_fallback_location_hook(self, branch, url): | ||
103 | 69 | """Transform a human-readable fallback URL into and id-based one. | ||
104 | 70 | |||
105 | 71 | Branches on Launchpad record their stacked-on URLs in the form | ||
106 | 72 | '/~user/product/branch', but we need to access branches based on | ||
107 | 73 | database ID to gain access to private branches. So we use this hook | ||
108 | 74 | into Bazaar's branch-opening process to translate the former to the | ||
109 | 75 | latter. | ||
110 | 76 | """ | ||
111 | 77 | # It might seem that using the LAUNCHPAD_SERVICES 'user', which allows | ||
112 | 78 | # access to all branches, here would be a security risk. But in fact | ||
113 | 79 | # it isn't, because a user will only have launchpad.View on the | ||
114 | 80 | # stacked branch if they have it for all the stacked-on branches. | ||
115 | 81 | # (It would be nice to use the user from the request, but that's far | ||
116 | 82 | # from simple because branch hooks are global per-process and we | ||
117 | 83 | # handle different requests in different threads). | ||
118 | 84 | transport_type, info, trail = self.branchfs.translatePath( | ||
119 | 85 | LAUNCHPAD_SERVICES, url) | ||
120 | 86 | return urlparse.urljoin( | ||
121 | 87 | config.codehosting.internal_branch_by_id_root, | ||
122 | 88 | branch_id_to_path(info['id'])) | ||
123 | 89 | |||
124 | 90 | def get_transports(self): | ||
125 | 91 | t = getattr(thread_transports, 'transports', None) | ||
126 | 92 | if t is None: | ||
127 | 93 | thread_transports.transports = [] | ||
128 | 94 | return thread_transports.transports | ||
129 | 95 | |||
130 | 96 | def _make_consumer(self, environ): | ||
131 | 97 | """Build an OpenID `Consumer` object with standard arguments.""" | ||
132 | 98 | return Consumer(environ[self.session_var], self.store) | ||
133 | 99 | |||
134 | 100 | def _begin_login(self, environ, start_response): | ||
135 | 101 | """Start the process of authenticating with OpenID. | ||
136 | 102 | |||
137 | 103 | We redirect the user to Launchpad to identify themselves, asking to be | ||
138 | 104 | sent their nickname. Launchpad will then redirect them to our +login | ||
139 | 105 | page with enough information that we can then redirect them again to | ||
140 | 106 | the page they were looking at, with a cookie that gives us the | ||
141 | 107 | username. | ||
142 | 108 | """ | ||
143 | 109 | openid_request = self._make_consumer(environ).begin( | ||
144 | 110 | 'https://' + config.vhost.openid.hostname) | ||
145 | 111 | openid_request.addExtension( | ||
146 | 112 | SRegRequest(required=['nickname'])) | ||
147 | 113 | back_to = construct_url(environ) | ||
148 | 114 | raise HTTPMovedPermanently(openid_request.redirectURL( | ||
149 | 115 | config.codehosting.secure_codebrowse_root, | ||
150 | 116 | config.codehosting.secure_codebrowse_root + '+login/?' | ||
151 | 117 | + urllib.urlencode({'back_to':back_to}))) | ||
152 | 118 | |||
153 | 119 | def _complete_login(self, environ, start_response): | ||
154 | 120 | """Complete the OpenID authentication process. | ||
155 | 121 | |||
156 | 122 | Here we handle the result of the OpenID process. If the process | ||
157 | 123 | succeeded, we record the username in the session and redirect the user | ||
158 | 124 | to the page they were trying to view that triggered the login attempt. | ||
159 | 125 | In the various failures cases we return a 401 Unauthorized response | ||
160 | 126 | with a brief explanation of what went wrong. | ||
161 | 127 | """ | ||
162 | 128 | query = dict(parse_querystring(environ)) | ||
163 | 129 | # Passing query['openid.return_to'] here is massive cheating, but | ||
164 | 130 | # given we control the endpoint who cares. | ||
165 | 131 | response = self._make_consumer(environ).complete( | ||
166 | 132 | query, query['openid.return_to']) | ||
167 | 133 | if response.status == SUCCESS: | ||
168 | 134 | self.log.error('open id response: SUCCESS') | ||
169 | 135 | sreg_info = SRegResponse.fromSuccessResponse(response) | ||
170 | 136 | environ[self.session_var]['user'] = sreg_info['nickname'] | ||
171 | 137 | raise HTTPMovedPermanently(query['back_to']) | ||
172 | 138 | elif response.status == FAILURE: | ||
173 | 139 | self.log.error('open id response: FAILURE: %s', response.message) | ||
174 | 140 | exc = HTTPUnauthorized() | ||
175 | 141 | exc.explanation = response.message | ||
176 | 142 | raise exc | ||
177 | 143 | elif response.status == CANCEL: | ||
178 | 144 | self.log.error('open id response: CANCEL') | ||
179 | 145 | exc = HTTPUnauthorized() | ||
180 | 146 | exc.explanation = "Authetication cancelled." | ||
181 | 147 | raise exc | ||
182 | 148 | else: | ||
183 | 149 | self.log.error('open id response: UNKNOWN') | ||
184 | 150 | exc = HTTPUnauthorized() | ||
185 | 151 | exc.explanation = "Unknown OpenID response." | ||
186 | 152 | raise exc | ||
187 | 153 | |||
188 | 154 | def __call__(self, environ, start_response): | ||
189 | 155 | environ['loggerhead.static.url'] = environ['SCRIPT_NAME'] | ||
190 | 156 | if environ['PATH_INFO'].startswith('/static/'): | ||
191 | 157 | path_info_pop(environ) | ||
192 | 158 | return static_app(environ, start_response) | ||
193 | 159 | elif environ['PATH_INFO'] == '/favicon.ico': | ||
194 | 160 | return favicon_app(environ, start_response) | ||
195 | 161 | elif environ['PATH_INFO'] == '/robots.txt': | ||
196 | 162 | return robots_app(environ, start_response) | ||
197 | 163 | elif environ['PATH_INFO'].startswith('/+login'): | ||
198 | 164 | return self._complete_login(environ, start_response) | ||
199 | 165 | path = environ['PATH_INFO'] | ||
200 | 166 | trailingSlashCount = len(path) - len(path.rstrip('/')) | ||
201 | 167 | user = environ[self.session_var].get('user', LAUNCHPAD_ANONYMOUS) | ||
202 | 168 | try: | ||
203 | 169 | transport_type, info, trail = self.branchfs.translatePath( | ||
204 | 170 | user, urlutils.escape(path)) | ||
205 | 171 | except xmlrpclib.Fault, f: | ||
206 | 172 | if faults.check_fault(f, faults.PathTranslationError): | ||
207 | 173 | raise HTTPNotFound() | ||
208 | 174 | elif faults.check_fault(f, faults.PermissionDenied): | ||
209 | 175 | # If we're not allowed to see the branch... | ||
210 | 176 | if environ['wsgi.url_scheme'] != 'https': | ||
211 | 177 | # ... the request shouldn't have come in over http, as | ||
212 | 178 | # requests for private branches over http should be | ||
213 | 179 | # redirected to https by the dynamic rewrite script we use | ||
214 | 180 | # (which runs before this code is reached), but just in | ||
215 | 181 | # case... | ||
216 | 182 | env_copy = environ.copy() | ||
217 | 183 | env_copy['wsgi.url_scheme'] = 'https' | ||
218 | 184 | raise HTTPMovedPermanently(construct_url(env_copy)) | ||
219 | 185 | elif user != LAUNCHPAD_ANONYMOUS: | ||
220 | 186 | # ... if the user is already logged in and still can't see | ||
221 | 187 | # the branch, they lose. | ||
222 | 188 | exc = HTTPUnauthorized() | ||
223 | 189 | exc.explanation = "You are logged in as %s." % user | ||
224 | 190 | raise exc | ||
225 | 191 | else: | ||
226 | 192 | # ... otherwise, lets give them a chance to log in with | ||
227 | 193 | # OpenID. | ||
228 | 194 | return self._begin_login(environ, start_response) | ||
229 | 195 | else: | ||
230 | 196 | raise | ||
231 | 197 | if transport_type != BRANCH_TRANSPORT: | ||
232 | 198 | raise HTTPNotFound() | ||
233 | 199 | trail = urlutils.unescape(trail).encode('utf-8') | ||
234 | 200 | trail += trailingSlashCount * '/' | ||
235 | 201 | amount_consumed = len(path) - len(trail) | ||
236 | 202 | consumed = path[:amount_consumed] | ||
237 | 203 | branch_name = consumed.strip('/') | ||
238 | 204 | self.log.info('Using branch: %s', branch_name) | ||
239 | 205 | if trail and not trail.startswith('/'): | ||
240 | 206 | trail = '/' + trail | ||
241 | 207 | environ['PATH_INFO'] = trail | ||
242 | 208 | environ['SCRIPT_NAME'] += consumed.rstrip('/') | ||
243 | 209 | branch_url = urlparse.urljoin( | ||
244 | 210 | config.codehosting.internal_branch_by_id_root, | ||
245 | 211 | branch_id_to_path(info['id'])) | ||
246 | 212 | branch_link = urlparse.urljoin( | ||
247 | 213 | config.codebrowse.launchpad_root, branch_name) | ||
248 | 214 | cachepath = os.path.join( | ||
249 | 215 | config.codebrowse.cachepath, branch_name[1:]) | ||
250 | 216 | if not os.path.isdir(cachepath): | ||
251 | 217 | os.makedirs(cachepath) | ||
252 | 218 | self.log.info('branch_url: %s', branch_url) | ||
253 | 219 | try: | ||
254 | 220 | bzr_branch = branch.Branch.open( | ||
255 | 221 | branch_url, possible_transports=self.get_transports()) | ||
256 | 222 | except errors.NotBranchError, err: | ||
257 | 223 | self.log.warning('Not a branch: %s', err) | ||
258 | 224 | raise HTTPNotFound() | ||
259 | 225 | bzr_branch.lock_read() | ||
260 | 226 | try: | ||
261 | 227 | view = BranchWSGIApp( | ||
262 | 228 | bzr_branch, branch_name, {'cachepath': cachepath}, | ||
263 | 229 | self.graph_cache, branch_link=branch_link, served_url=None) | ||
264 | 230 | return view.app(environ, start_response) | ||
265 | 231 | finally: | ||
266 | 232 | bzr_branch.unlock() | ||
267 | 0 | 233 | ||
268 | === added file 'lib/launchpad_loggerhead/debug.py' | |||
269 | --- lib/launchpad_loggerhead/debug.py 1970-01-01 00:00:00 +0000 | |||
270 | +++ lib/launchpad_loggerhead/debug.py 2010-04-27 03:46:17 +0000 | |||
271 | @@ -0,0 +1,120 @@ | |||
272 | 1 | # Copyright 2009 Canonical Ltd. This software is licensed under the | ||
273 | 2 | # GNU Affero General Public License version 3 (see the file LICENSE). | ||
274 | 3 | |||
275 | 4 | import thread | ||
276 | 5 | import time | ||
277 | 6 | |||
278 | 7 | from paste.request import construct_url | ||
279 | 8 | |||
280 | 9 | |||
281 | 10 | def tabulate(cells): | ||
282 | 11 | """Format a list of lists of strings in a table. | ||
283 | 12 | |||
284 | 13 | The 'cells' are centered. | ||
285 | 14 | |||
286 | 15 | >>> print ''.join(tabulate( | ||
287 | 16 | ... [['title 1', 'title 2'], | ||
288 | 17 | ... ['short', 'rather longer']])) | ||
289 | 18 | title 1 title 2 | ||
290 | 19 | short rather longer | ||
291 | 20 | """ | ||
292 | 21 | widths = {} | ||
293 | 22 | for row in cells: | ||
294 | 23 | for col_index, cell in enumerate(row): | ||
295 | 24 | widths[col_index] = max(len(cell), widths.get(col_index, 0)) | ||
296 | 25 | result = [] | ||
297 | 26 | for row in cells: | ||
298 | 27 | result_row = '' | ||
299 | 28 | for col_index, cell in enumerate(row): | ||
300 | 29 | result_row += cell.center(widths[col_index] + 2) | ||
301 | 30 | result.append(result_row.rstrip() + '\n') | ||
302 | 31 | return result | ||
303 | 32 | |||
304 | 33 | |||
305 | 34 | def threadpool_debug(app): | ||
306 | 35 | """Wrap `app` to provide debugging information about the threadpool state. | ||
307 | 36 | |||
308 | 37 | The returned application will serve debugging information about the state | ||
309 | 38 | of the threadpool at '/thread-debug' -- but only when accessed directly, | ||
310 | 39 | not when accessed through Apache. | ||
311 | 40 | """ | ||
312 | 41 | def wrapped(environ, start_response): | ||
313 | 42 | if ('HTTP_X_FORWARDED_SERVER' in environ | ||
314 | 43 | or environ['PATH_INFO'] != '/thread-debug'): | ||
315 | 44 | environ['lp.timestarted'] = time.time() | ||
316 | 45 | return app(environ, start_response) | ||
317 | 46 | threadpool = environ['paste.httpserver.thread_pool'] | ||
318 | 47 | start_response("200 Ok", []) | ||
319 | 48 | output = [("url", "time running", "time since last activity")] | ||
320 | 49 | now = time.time() | ||
321 | 50 | # Because we're accessing mutable structures without locks here, | ||
322 | 51 | # we're a bit cautious about things looking like we expect -- if a | ||
323 | 52 | # worker doesn't seem fully set up, we just ignore it. | ||
324 | 53 | for worker in threadpool.workers: | ||
325 | 54 | if not hasattr(worker, 'thread_id'): | ||
326 | 55 | continue | ||
327 | 56 | time_started, info = threadpool.worker_tracker.get( | ||
328 | 57 | worker.thread_id, (None, None)) | ||
329 | 58 | if time_started is not None and info is not None: | ||
330 | 59 | real_time_started = info.get( | ||
331 | 60 | 'lp.timestarted', time_started) | ||
332 | 61 | output.append( | ||
333 | 62 | map(str, | ||
334 | 63 | (construct_url(info), | ||
335 | 64 | now - real_time_started, | ||
336 | 65 | now - time_started,))) | ||
337 | 66 | return tabulate(output) | ||
338 | 67 | return wrapped | ||
339 | 68 | |||
340 | 69 | |||
341 | 70 | def change_kill_thread_criteria(application): | ||
342 | 71 | """Interfere with threadpool so that threads are killed for inactivity. | ||
343 | 72 | |||
344 | 73 | The usual rules with paste's threadpool is that a thread that takes longer | ||
345 | 74 | than 'hung_thread_limit' seconds to process a request is considered hung | ||
346 | 75 | and more than 'kill_thread_limit' seconds is killed. | ||
347 | 76 | |||
348 | 77 | Because loggerhead streams its output, how long the entire request takes | ||
349 | 78 | to process depends on things like how fast the users internet connection | ||
350 | 79 | is. What we'd like to do is kill threads that don't _start_ to produce | ||
351 | 80 | output for 'kill_thread_limit' seconds. | ||
352 | 81 | |||
353 | 82 | What this class actually does is arrange things so that threads that | ||
354 | 83 | produce no output for 'kill_thread_limit' are killed, because that's the | ||
355 | 84 | rule Apache uses when interpreting ProxyTimeout. | ||
356 | 85 | """ | ||
357 | 86 | def wrapped_application(environ, start_response): | ||
358 | 87 | threadpool = environ['paste.httpserver.thread_pool'] | ||
359 | 88 | def reset_timer(): | ||
360 | 89 | """Make this thread safe for another 'kill_thread_limit' seconds. | ||
361 | 90 | |||
362 | 91 | We do this by hacking the threadpool's record of when this thread | ||
363 | 92 | started to pretend that it started right now. Hacky, but it's | ||
364 | 93 | enough to fool paste.httpserver.ThreadPool.kill_hung_threads and | ||
365 | 94 | that's what matters. | ||
366 | 95 | """ | ||
367 | 96 | threadpool.worker_tracker[thread.get_ident()][0] = time.time() | ||
368 | 97 | def response_hook(status, response_headers, exc_info=None): | ||
369 | 98 | # We reset the timer when the HTTP headers are sent... | ||
370 | 99 | reset_timer() | ||
371 | 100 | writer = start_response(status, response_headers, exc_info) | ||
372 | 101 | def wrapped_writer(arg): | ||
373 | 102 | # ... and whenever more output has been generated. | ||
374 | 103 | reset_timer() | ||
375 | 104 | return writer(arg) | ||
376 | 105 | return wrapped_writer | ||
377 | 106 | result = application(environ, response_hook) | ||
378 | 107 | # WSGI allows the application to return an iterable, which could be a | ||
379 | 108 | # generator that does significant processing between successive items, | ||
380 | 109 | # so we should reset the timer between each item. | ||
381 | 110 | # | ||
382 | 111 | # This isn't really necessary as loggerhead doesn't return any | ||
383 | 112 | # non-trivial iterables to the WSGI server. But it's probably better | ||
384 | 113 | # to cope with this case to avoid nasty suprises if loggerhead | ||
385 | 114 | # changes. | ||
386 | 115 | def reset_timer_between_items(iterable): | ||
387 | 116 | for item in iterable: | ||
388 | 117 | reset_timer() | ||
389 | 118 | yield item | ||
390 | 119 | return reset_timer_between_items(result) | ||
391 | 120 | return wrapped_application | ||
392 | 0 | 121 | ||
393 | === added file 'lib/launchpad_loggerhead/session.py' | |||
394 | --- lib/launchpad_loggerhead/session.py 1970-01-01 00:00:00 +0000 | |||
395 | +++ lib/launchpad_loggerhead/session.py 2010-04-27 03:46:17 +0000 | |||
396 | @@ -0,0 +1,73 @@ | |||
397 | 1 | # Copyright 2009 Canonical Ltd. This software is licensed under the | ||
398 | 2 | # GNU Affero General Public License version 3 (see the file LICENSE). | ||
399 | 3 | |||
400 | 4 | """Simple paste-y session manager tuned for the needs of launchpad-loggerhead. | ||
401 | 5 | """ | ||
402 | 6 | |||
403 | 7 | import pickle | ||
404 | 8 | |||
405 | 9 | from paste.auth.cookie import AuthCookieHandler, AuthCookieSigner | ||
406 | 10 | |||
407 | 11 | |||
408 | 12 | class MyAuthCookieSigner(AuthCookieSigner): | ||
409 | 13 | """Fix a bug in AuthCookieSigner.""" | ||
410 | 14 | |||
411 | 15 | def sign(self, content): | ||
412 | 16 | # XXX 2008-01-13 Michael Hudson: paste.auth.cookie generates bogus | ||
413 | 17 | # cookies when the value is long: | ||
414 | 18 | # http://trac.pythonpaste.org/pythonpaste/ticket/257. This is fixed | ||
415 | 19 | # now, so when a new version is released and packaged we can remove | ||
416 | 20 | # this class. | ||
417 | 21 | r = AuthCookieSigner.sign(self, content) | ||
418 | 22 | return r.replace('\n', '') | ||
419 | 23 | |||
420 | 24 | |||
421 | 25 | class SessionHandler(object): | ||
422 | 26 | """Middleware that provides a cookie-based session. | ||
423 | 27 | |||
424 | 28 | The session dict is stored, pickled (and HMACed), in a cookie, so don't | ||
425 | 29 | store very much in the session! | ||
426 | 30 | """ | ||
427 | 31 | |||
428 | 32 | def __init__(self, application, session_var, secret=None): | ||
429 | 33 | """Initialize a SessionHandler instance. | ||
430 | 34 | |||
431 | 35 | :param application: This is the wrapped application which will have | ||
432 | 36 | access to the ``environ[session_var]`` dictionary managed by this | ||
433 | 37 | middleware. | ||
434 | 38 | :param session_var: The key under which to store the session | ||
435 | 39 | dictionary in the environment. | ||
436 | 40 | :param secret: A secret value used for signing the cookie. If not | ||
437 | 41 | supplied, a new secret will be used for each instantiation of the | ||
438 | 42 | SessionHandler. | ||
439 | 43 | """ | ||
440 | 44 | self.application = application | ||
441 | 45 | self.cookie_handler = AuthCookieHandler( | ||
442 | 46 | self._process, scanlist=[session_var], | ||
443 | 47 | signer=MyAuthCookieSigner(secret)) | ||
444 | 48 | self.session_var = session_var | ||
445 | 49 | |||
446 | 50 | def __call__(self, environ, start_response): | ||
447 | 51 | # We need to put the request through the cookie handler first, so we | ||
448 | 52 | # can access the validated string in the environ in `_process` below. | ||
449 | 53 | return self.cookie_handler(environ, start_response) | ||
450 | 54 | |||
451 | 55 | def _process(self, environ, start_response): | ||
452 | 56 | """Process a request. | ||
453 | 57 | |||
454 | 58 | AuthCookieHandler takes care of getting the text value of the session | ||
455 | 59 | in and out of the cookie (and validating the text using HMAC) so we | ||
456 | 60 | just need to convert that string to and from a real dictionary using | ||
457 | 61 | pickle. | ||
458 | 62 | """ | ||
459 | 63 | if self.session_var in environ: | ||
460 | 64 | session = pickle.loads(environ[self.session_var]) | ||
461 | 65 | else: | ||
462 | 66 | session = {} | ||
463 | 67 | environ[self.session_var] = session | ||
464 | 68 | def response_hook(status, response_headers, exc_info=None): | ||
465 | 69 | session = environ.pop(self.session_var) | ||
466 | 70 | if session: | ||
467 | 71 | environ[self.session_var] = pickle.dumps(session) | ||
468 | 72 | return start_response(status, response_headers, exc_info) | ||
469 | 73 | return self.application(environ, response_hook) | ||
470 | 0 | 74 | ||
471 | === added directory 'lib/launchpad_loggerhead/static' | |||
472 | === added file 'lib/launchpad_loggerhead/static/robots.txt' | |||
473 | --- lib/launchpad_loggerhead/static/robots.txt 1970-01-01 00:00:00 +0000 | |||
474 | +++ lib/launchpad_loggerhead/static/robots.txt 2010-04-27 03:46:17 +0000 | |||
475 | @@ -0,0 +1,2 @@ | |||
476 | 1 | User-agent: * | ||
477 | 2 | Disallow: / | ||
478 | 0 | 3 | ||
479 | === added file 'scripts/start-loggerhead.py' | |||
480 | --- scripts/start-loggerhead.py 1970-01-01 00:00:00 +0000 | |||
481 | +++ scripts/start-loggerhead.py 2010-04-27 03:46:17 +0000 | |||
482 | @@ -0,0 +1,177 @@ | |||
483 | 1 | #!/usr/bin/python2.5 -S | ||
484 | 2 | # | ||
485 | 3 | # Copyright 2009, 2010 Canonical Ltd. This software is licensed under the | ||
486 | 4 | # GNU Affero General Public License version 3 (see the file LICENSE). | ||
487 | 5 | |||
488 | 6 | import _pythonpath | ||
489 | 7 | |||
490 | 8 | import logging | ||
491 | 9 | import os | ||
492 | 10 | import sys | ||
493 | 11 | |||
494 | 12 | from paste import httpserver | ||
495 | 13 | from paste.deploy.config import PrefixMiddleware | ||
496 | 14 | from paste.httpexceptions import HTTPExceptionHandler | ||
497 | 15 | from paste.request import construct_url | ||
498 | 16 | from paste.translogger import TransLogger | ||
499 | 17 | |||
500 | 18 | from canonical.config import config | ||
501 | 19 | import lp.codehosting | ||
502 | 20 | |||
503 | 21 | LISTEN_HOST = '0.0.0.0' | ||
504 | 22 | LISTEN_PORT = 8080 | ||
505 | 23 | THREADPOOL_WORKERS = 10 | ||
506 | 24 | |||
507 | 25 | |||
508 | 26 | class NoLockingFileHandler(logging.FileHandler): | ||
509 | 27 | """A version of logging.FileHandler that doesn't do it's own locking. | ||
510 | 28 | |||
511 | 29 | We experienced occasional hangs in production where gdb-ery on the server | ||
512 | 30 | revealed that we sometimes end up with many threads blocking on the RLock | ||
513 | 31 | held by the logging file handler, and log reading finds that an exception | ||
514 | 32 | managed to kill a thread in an unsafe window for RLock's. | ||
515 | 33 | |||
516 | 34 | Luckily, there's no real reason for us to take a lock during logging as | ||
517 | 35 | each log message translates to one call to .write on a file object, which | ||
518 | 36 | translates to one fwrite call, and it seems that this does enough locking | ||
519 | 37 | itself for our purposes. | ||
520 | 38 | |||
521 | 39 | So this handler just doesn't lock in log message handling. | ||
522 | 40 | """ | ||
523 | 41 | |||
524 | 42 | def acquire(self): | ||
525 | 43 | pass | ||
526 | 44 | |||
527 | 45 | def release(self): | ||
528 | 46 | pass | ||
529 | 47 | |||
530 | 48 | |||
531 | 49 | def setup_logging(home, foreground): | ||
532 | 50 | # i hate that stupid logging config format, so just set up logging here. | ||
533 | 51 | |||
534 | 52 | log_folder = config.codebrowse.log_folder | ||
535 | 53 | if not log_folder: | ||
536 | 54 | log_folder = os.path.join(home, 'logs') | ||
537 | 55 | if not os.path.exists(log_folder): | ||
538 | 56 | os.mkdir(log_folder) | ||
539 | 57 | |||
540 | 58 | f = logging.Formatter( | ||
541 | 59 | '%(levelname)-.3s [%(asctime)s.%(msecs)03d] [%(thread)d] %(name)s: %(message)s', | ||
542 | 60 | '%Y%m%d-%H:%M:%S') | ||
543 | 61 | debug_log = NoLockingFileHandler(os.path.join(log_folder, 'debug.log')) | ||
544 | 62 | debug_log.setLevel(logging.DEBUG) | ||
545 | 63 | debug_log.setFormatter(f) | ||
546 | 64 | if foreground: | ||
547 | 65 | stdout_log = logging.StreamHandler(sys.stdout) | ||
548 | 66 | stdout_log.setLevel(logging.DEBUG) | ||
549 | 67 | stdout_log.setFormatter(f) | ||
550 | 68 | f = logging.Formatter('[%(asctime)s.%(msecs)03d] %(message)s', | ||
551 | 69 | '%Y%m%d-%H:%M:%S') | ||
552 | 70 | access_log = NoLockingFileHandler(os.path.join(log_folder, 'access.log')) | ||
553 | 71 | access_log.setLevel(logging.INFO) | ||
554 | 72 | access_log.setFormatter(f) | ||
555 | 73 | |||
556 | 74 | logging.getLogger('').setLevel(logging.DEBUG) | ||
557 | 75 | logging.getLogger('').addHandler(debug_log) | ||
558 | 76 | logging.getLogger('wsgi').addHandler(access_log) | ||
559 | 77 | |||
560 | 78 | if foreground: | ||
561 | 79 | logging.getLogger('').addHandler(stdout_log) | ||
562 | 80 | else: | ||
563 | 81 | class S(object): | ||
564 | 82 | def write(self, str): | ||
565 | 83 | logging.getLogger().error(str.rstrip('\n')) | ||
566 | 84 | def flush(self): | ||
567 | 85 | pass | ||
568 | 86 | sys.stderr = S() | ||
569 | 87 | |||
570 | 88 | |||
571 | 89 | |||
572 | 90 | foreground = False | ||
573 | 91 | if len(sys.argv) > 1: | ||
574 | 92 | if sys.argv[1] == '-f': | ||
575 | 93 | foreground = True | ||
576 | 94 | |||
577 | 95 | home = os.path.realpath(os.path.dirname(__file__)) | ||
578 | 96 | pidfile = os.path.join(home, 'loggerhead.pid') | ||
579 | 97 | |||
580 | 98 | if not foreground: | ||
581 | 99 | sys.stderr.write('\n') | ||
582 | 100 | sys.stderr.write('Launching loggerhead into the background.\n') | ||
583 | 101 | sys.stderr.write('PID file: %s\n' % (pidfile,)) | ||
584 | 102 | sys.stderr.write('\n') | ||
585 | 103 | |||
586 | 104 | from loggerhead.daemon import daemonize | ||
587 | 105 | daemonize(pidfile, home) | ||
588 | 106 | |||
589 | 107 | setup_logging(home, foreground=foreground) | ||
590 | 108 | |||
591 | 109 | log = logging.getLogger('loggerhead') | ||
592 | 110 | log.info('Starting up...') | ||
593 | 111 | |||
594 | 112 | log.info('Loading the bzr plugins...') | ||
595 | 113 | from bzrlib.plugin import load_plugins | ||
596 | 114 | load_plugins() | ||
597 | 115 | |||
598 | 116 | import bzrlib.plugins | ||
599 | 117 | if getattr(bzrlib.plugins, 'loom', None) is None: | ||
600 | 118 | log.error('Loom plugin loading failed.') | ||
601 | 119 | |||
602 | 120 | from launchpad_loggerhead.debug import ( | ||
603 | 121 | change_kill_thread_criteria, threadpool_debug) | ||
604 | 122 | from launchpad_loggerhead.app import RootApp | ||
605 | 123 | from launchpad_loggerhead.session import SessionHandler | ||
606 | 124 | |||
607 | 125 | SESSION_VAR = 'lh.session' | ||
608 | 126 | |||
609 | 127 | secret = open(os.path.join(config.root, config.codebrowse.secret_path)).read() | ||
610 | 128 | |||
611 | 129 | app = RootApp(SESSION_VAR) | ||
612 | 130 | app = HTTPExceptionHandler(app) | ||
613 | 131 | app = SessionHandler(app, SESSION_VAR, secret) | ||
614 | 132 | def log_on_request_start(app): | ||
615 | 133 | def wrapped(environ, start_response): | ||
616 | 134 | log = logging.getLogger('loggerhead') | ||
617 | 135 | log.info("Starting to process %s", construct_url(environ)) | ||
618 | 136 | return app(environ, start_response) | ||
619 | 137 | return wrapped | ||
620 | 138 | app = log_on_request_start(app) | ||
621 | 139 | app = PrefixMiddleware(app) | ||
622 | 140 | app = TransLogger(app) | ||
623 | 141 | app = threadpool_debug(app) | ||
624 | 142 | |||
625 | 143 | def set_scheme(app): | ||
626 | 144 | """Set wsgi.url_scheme in the environment correctly. | ||
627 | 145 | |||
628 | 146 | We serve requests that originated from both http and https, and | ||
629 | 147 | distinguish between them by adding a header in the https Apache config. | ||
630 | 148 | """ | ||
631 | 149 | def wrapped(environ, start_response): | ||
632 | 150 | environ['wsgi.url_scheme'] = environ.pop( | ||
633 | 151 | 'HTTP_X_FORWARDED_SCHEME', 'http') | ||
634 | 152 | return app(environ, start_response) | ||
635 | 153 | return wrapped | ||
636 | 154 | app = set_scheme(app) | ||
637 | 155 | app = change_kill_thread_criteria(app) | ||
638 | 156 | |||
639 | 157 | try: | ||
640 | 158 | httpserver.serve( | ||
641 | 159 | app, host=LISTEN_HOST, port=LISTEN_PORT, | ||
642 | 160 | threadpool_workers=THREADPOOL_WORKERS, | ||
643 | 161 | threadpool_options={ | ||
644 | 162 | # Kill threads after 300 seconds. This is insanely high, but | ||
645 | 163 | # lower enough than the default (1800 seconds!) that evidence | ||
646 | 164 | # suggests it will be hit occasionally, and there's very little | ||
647 | 165 | # chance of it having negative consequences. | ||
648 | 166 | 'kill_thread_limit': 300, | ||
649 | 167 | # Check for threads that should be killed every 10 requests. The | ||
650 | 168 | # default is every 100, which is easily long enough for things to | ||
651 | 169 | # gum up completely in between checks. | ||
652 | 170 | 'hung_check_period': 10, | ||
653 | 171 | }) | ||
654 | 172 | finally: | ||
655 | 173 | log.info('Shutdown.') | ||
656 | 174 | try: | ||
657 | 175 | os.remove(pidfile) | ||
658 | 176 | except OSError: | ||
659 | 177 | pass | ||
660 | 0 | 178 | ||
661 | === added file 'scripts/stop-loggerhead.py' | |||
662 | --- scripts/stop-loggerhead.py 1970-01-01 00:00:00 +0000 | |||
663 | +++ scripts/stop-loggerhead.py 2010-04-27 03:46:17 +0000 | |||
664 | @@ -0,0 +1,33 @@ | |||
665 | 1 | #!/usr/bin/python2.5 -S | ||
666 | 2 | # | ||
667 | 3 | # Copyright 2009, 2010 Canonical Ltd. This software is licensed under the | ||
668 | 4 | # GNU Affero General Public License version 3 (see the file LICENSE). | ||
669 | 5 | |||
670 | 6 | import _pythonpath | ||
671 | 7 | |||
672 | 8 | import os | ||
673 | 9 | import sys | ||
674 | 10 | |||
675 | 11 | home = os.path.realpath(os.path.dirname(__file__)) | ||
676 | 12 | pidfile = os.path.join(home, 'loggerhead.pid') | ||
677 | 13 | |||
678 | 14 | try: | ||
679 | 15 | f = open(pidfile, 'r') | ||
680 | 16 | except IOError, e: | ||
681 | 17 | print 'No pid file found.' | ||
682 | 18 | sys.exit(1) | ||
683 | 19 | |||
684 | 20 | pid = int(f.readline()) | ||
685 | 21 | |||
686 | 22 | try: | ||
687 | 23 | os.kill(pid, 0) | ||
688 | 24 | except OSError, e: | ||
689 | 25 | print 'Stale pid file; server is not running.' | ||
690 | 26 | sys.exit(1) | ||
691 | 27 | |||
692 | 28 | |||
693 | 29 | print 'Shutting down previous server @ pid %d.' % (pid,) | ||
694 | 30 | |||
695 | 31 | |||
696 | 32 | import signal | ||
697 | 33 | os.kill(pid, signal.SIGTERM) | ||
698 | 0 | 34 | ||
699 | === modified file 'utilities/sourcedeps.conf' | |||
700 | --- utilities/sourcedeps.conf 2010-04-21 12:30:48 +0000 | |||
701 | +++ utilities/sourcedeps.conf 2010-04-27 03:46:17 +0000 | |||
702 | @@ -5,7 +5,6 @@ | |||
703 | 5 | bzr-svn lp:~launchpad-pqm/bzr-svn/devel;revno=2708 | 5 | bzr-svn lp:~launchpad-pqm/bzr-svn/devel;revno=2708 |
704 | 6 | cscvs lp:~launchpad-pqm/launchpad-cscvs/devel;revno=432 | 6 | cscvs lp:~launchpad-pqm/launchpad-cscvs/devel;revno=432 |
705 | 7 | dulwich lp:~launchpad-pqm/dulwich/devel;revno=418 | 7 | dulwich lp:~launchpad-pqm/dulwich/devel;revno=418 |
706 | 8 | launchpad-loggerhead lp:~launchpad-pqm/launchpad-loggerhead/devel;revno=54 | ||
707 | 9 | loggerhead lp:~launchpad-pqm/loggerhead/devel;revno=174 | 8 | loggerhead lp:~launchpad-pqm/loggerhead/devel;revno=174 |
708 | 10 | lpreview lp:~launchpad-pqm/bzr-lpreview/devel;revno=23 | 9 | lpreview lp:~launchpad-pqm/bzr-lpreview/devel;revno=23 |
709 | 11 | mailman lp:~launchpad-pqm/mailman/2.1;revno=976 | 10 | mailman lp:~launchpad-pqm/mailman/2.1;revno=976 |