Merge lp:~wgrant/launchpad/librarian-missing-500 into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 17877
Proposed branch: lp:~wgrant/launchpad/librarian-missing-500
Merge into: lp:launchpad
Diff against target: 142 lines (+61/-24)
2 files modified
lib/lp/services/librarianserver/tests/test_web.py (+36/-0)
lib/lp/services/librarianserver/web.py (+25/-24)
To merge this branch: bzr merge lp:~wgrant/launchpad/librarian-missing-500
Reviewer Review Type Date Requested Status
Colin Watson (community) Approve
Review via email: mp+281370@code.launchpad.net

Commit message

Fix Librarian to generate non-cachable 500s on missing storage files.

Description of the change

Fix Librarian to generate non-cachable 500s on missing storage files.

Previously we would return a 404 that was cachable for a year, which is silly for what is usually a temporary (otherwise disastrous) internal error.

To post a comment you must log in.
Revision history for this message
Colin Watson (cjwatson) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/librarianserver/tests/test_web.py'
2--- lib/lp/services/librarianserver/tests/test_web.py 2015-12-09 06:24:43 +0000
3+++ lib/lp/services/librarianserver/tests/test_web.py 2015-12-27 04:00:12 +0000
4@@ -5,6 +5,7 @@
5 from datetime import datetime
6 import hashlib
7 import httplib
8+import os
9 import unittest
10 from urllib2 import (
11 HTTPError,
12@@ -37,6 +38,7 @@
13 LibraryFileAlias,
14 TimeLimitedToken,
15 )
16+from lp.services.librarianserver.storage import LibrarianStorage
17 from lp.testing.dbuser import switch_dbuser
18 from lp.testing.layers import (
19 LaunchpadFunctionalLayer,
20@@ -239,6 +241,40 @@
21 self.failUnlessEqual(
22 last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
23
24+ def test_missing_storage(self):
25+ # When a file exists in the DB but is missing from disk, a 404
26+ # is just confusing. It's an internal error, so 500 instead.
27+ client = LibrarianClient()
28+
29+ # Upload a file so we can retrieve it.
30+ sample_data = 'blah'
31+ file_alias_id = client.addFile(
32+ 'sample', len(sample_data), StringIO(sample_data),
33+ contentType='text/plain')
34+ url = client.getURLForAlias(file_alias_id)
35+
36+ # Change the date_created to a known value that doesn't match
37+ # the disk timestamp. The timestamp on disk cannot be trusted.
38+ file_alias = IMasterStore(LibraryFileAlias).get(
39+ LibraryFileAlias, file_alias_id)
40+
41+ # Commit so the file is available from the Librarian.
42+ self.commit()
43+
44+ # Fetch the file via HTTP.
45+ urlopen(url)
46+
47+ # Delete the on-disk file.
48+ storage = LibrarianStorage(config.librarian_server.root, None)
49+ os.remove(storage._fileLocation(file_alias.contentID))
50+
51+ # The URL now 500s, since the DB says it should exist.
52+ exception = self.assertRaises(HTTPError, urlopen, url)
53+ self.assertEqual(500, exception.code)
54+ self.assertIn('Server', exception.info())
55+ self.assertNotIn('Last-Modified', exception.info())
56+ self.assertNotIn('Cache-Control', exception.info())
57+
58 def get_restricted_file_and_public_url(self, filename='sample'):
59 # Use a regular LibrarianClient to ensure we speak to the
60 # nonrestricted port on the librarian which is where secured
61
62=== modified file 'lib/lp/services/librarianserver/web.py'
63--- lib/lp/services/librarianserver/web.py 2015-07-10 02:42:42 +0000
64+++ lib/lp/services/librarianserver/web.py 2015-12-27 04:00:12 +0000
65@@ -176,25 +176,25 @@
66 % (dbfilename.encode('utf-8'), filename))
67 return fourOhFour
68
69- if not restricted:
70- # Set our caching headers. Librarian files can be cached forever.
71- request.setHeader('Cache-Control', 'max-age=31536000, public')
72- else:
73- # Restricted files require revalidation every time. For now,
74- # until the deployment details are completely reviewed, the
75- # simplest, most cautious approach is taken: no caching permited.
76- request.setHeader('Cache-Control', 'max-age=0, private')
77-
78- if self.storage.hasFile(dbcontentID) or self.upstreamHost is None:
79+ if self.storage.hasFile(dbcontentID):
80 # XXX: Brad Crittenden 2007-12-05 bug=174204: When encodings are
81 # stored as part of a file's metadata this logic will be replaced.
82 encoding, mimetype = guess_librarian_encoding(filename, mimetype)
83+ # Set our caching headers. Public Librarian files can be
84+ # cached forever, while private ones mustn't be at all.
85+ request.setHeader(
86+ 'Cache-Control',
87+ 'max-age=31536000, public'
88+ if not restricted else 'max-age=0, private')
89 return File(
90 mimetype, encoding, date_created,
91 self.storage._fileLocation(dbcontentID))
92- else:
93+ elif self.upstreamHost is not None:
94 return proxy.ReverseProxyResource(self.upstreamHost,
95 self.upstreamPort, request.path)
96+ else:
97+ raise AssertionError(
98+ "Content %d missing from storage." % dbcontentID)
99
100 @defer.inlineCallbacks
101 def _cb_getFileAlias_swift(
102@@ -212,26 +212,27 @@
103 % (dbfilename.encode('utf-8'), filename))
104 defer.returnValue(fourOhFour)
105
106- if not restricted:
107- # Set our caching headers. Librarian files can be cached forever.
108- request.setHeader('Cache-Control', 'max-age=31536000, public')
109- else:
110- # Restricted files require revalidation every time. For now,
111- # until the deployment details are completely reviewed, the
112- # simplest, most cautious approach is taken: no caching permited.
113- request.setHeader('Cache-Control', 'max-age=0, private')
114-
115 stream = yield self.storage.open(dbcontentID)
116- if stream is not None or self.upstreamHost is None:
117+ if stream is not None:
118 # XXX: Brad Crittenden 2007-12-05 bug=174204: When encodings are
119 # stored as part of a file's metadata this logic will be replaced.
120 encoding, mimetype = guess_librarian_encoding(filename, mimetype)
121- defer.returnValue(File_swift(
122- mimetype, encoding, date_created, stream, size))
123- else:
124+ file = File_swift(mimetype, encoding, date_created, stream, size)
125+ assert file.exists
126+ # Set our caching headers. Public Librarian files can be
127+ # cached forever, while private ones mustn't be at all.
128+ request.setHeader(
129+ 'Cache-Control',
130+ 'max-age=31536000, public'
131+ if not restricted else 'max-age=0, private')
132+ defer.returnValue(file)
133+ elif self.upstreamHost is not None:
134 defer.returnValue(
135 proxy.ReverseProxyResource(
136 self.upstreamHost, self.upstreamPort, request.path))
137+ else:
138+ raise AssertionError(
139+ "Content %d missing from storage." % dbcontentID)
140
141 def render_GET(self, request):
142 return defaultResource.render(request)