Launchpad itself

Merge lp:~wgrant/launchpad/bug-677270 into lp:launchpad

bug-677270
Merge into devel

Proposed by William Grant on 2015-12-09

Status:

Merged

Merged at revision:

17866

Proposed branch:

lp:~wgrant/launchpad/bug-677270

Merge into:

lp:launchpad

Diff against target:

152 lines (+57/-8)

3 files modified

lib/lp/services/librarian/client.py (+6/-3)
lib/lp/services/librarianserver/db.py (+20/-2)
lib/lp/services/librarianserver/tests/test_web.py (+31/-3)

To merge this branch:

bzr merge lp:~wgrant/launchpad/bug-677270

High

Fix Released

Link a bug report

Reviewer	Review Type	Date Requested	Status
Colin Watson (community)		2015-12-09	Approve on 2015-12-10
Review via email: mp+279974@code.launchpad.net

Commit message

Canonicalise path encoding before checking a librarian TimeLimitedToken.

Description of the change

Canonicalise path encoding before checking a librarian TimeLimitedToken, and leave tildes unescaped in librarian URLs to appease RFC 3986 and Chromium.

I'm also tempted to whitelist +, as it has no special meaning in the path. Thoughts welcome.

Revision history for this message

Colin Watson (cjwatson) wrote on 2015-12-10:

IRC discussion has convinced me that we should whitelist +; doing so is harmless and would probably make e.g. dget behave more gracefully. Otherwise this looks good to me.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Barki Mustapha

Celso Providelo

Christian Reis

Christy Awad

Colin Watson

Harpianto,ANDI

James Troup

John A Meinel

Kevin bush

Launchpad code reviewers

Launchpad code reviewers from Canonical

Matthew Tanner

Maximiliano Bertacchini

Oguz Ersoz

Simon Brakhane

Ubuntu-BR DevOps

William Grant

alexsio nau

alhawiti

api.ng

pedro cavazos

todaioan

wenjingwen

to status/vote changes:

Tzaddi

Tzaddi Belding

 === modified file 'lib/lp/services/librarian/client.py'
 --- lib/lp/services/librarian/client.py	2015-07-08 16:05:11 +0000
 +++ lib/lp/services/librarian/client.py	2015-12-10 00:38:11 +0000
@@ -53,9 +53,12 @@
  def url_path_quote(filename):
      """Quote `filename` for use in a URL."""
--    # XXX RobertCollins 2004-09-21: Perhaps filenames with / in them
--    # should be disallowed?
--    return urllib.quote(filename).replace('/', '%2F')
++    # RFC 3986 says ~ should not be generated escaped, but urllib.quote
++    # predates it. Additionally, + is safe to use unescaped in paths and is
++    # frequently used in Debian versions, so leave it alone.
++    #
++    # This needs to match Library.getAlias' TimeLimitedToken handling.
++    return urllib.quote(filename, safe='/~+')
  def get_libraryfilealias_download_path(aliasID, filename):
 === modified file 'lib/lp/services/librarianserver/db.py'
 --- lib/lp/services/librarianserver/db.py	2014-09-02 02:03:37 +0000
 +++ lib/lp/services/librarianserver/db.py	2015-12-10 00:38:11 +0000
@@ -9,9 +9,11 @@
+     ]
  import hashlib
++import urllib
  from storm.expr import (
      And,
++    Or,
      SQL,
+     )
@@ -56,13 +58,29 @@
          """
          restricted = self.restricted
          if token and path:
--            # with a token and a path we may be able to serve restricted files
++            # With a token and a path we may be able to serve restricted files
              # on the public port.
++            #
++            # The URL-encoding of the path may have changed somewhere
++            # along the line, so reencode it canonically. LFA.filename
++            # can't contain slashes, so they're safe to leave unencoded.
++            # And urllib.quote erroneously excludes ~ from its safe set,
++            # while RFC 3986 says it should be unescaped and Chromium
++            # forcibly decodes it in any URL that it sees.
++            #
++            # This needs to match url_path_quote.
++            plain_tilde_path = urllib.quote(urllib.unquote(path), safe='/~+')
++            # XXX wgrant 2015-12-09: We used to generate URLs with
++            # escaped tildes, so support those until the tokens are all
++            # expired.
++            encoded_tilde_path = urllib.quote(urllib.unquote(path), safe='/')
              store = session_store()
              token_found = store.find(TimeLimitedToken,
                  SQL("age(created) < interval '1 day'"),
                  TimeLimitedToken.token == hashlib.sha256(token).hexdigest(),
--                TimeLimitedToken.path == path).is_empty()
++                Or(
++                    TimeLimitedToken.path == plain_tilde_path,
++                    TimeLimitedToken.path == encoded_tilde_path)).is_empty()
              store.reset()
              if token_found:
                  raise LookupError("Token stale/pruned/path mismatch")
 === modified file 'lib/lp/services/librarianserver/tests/test_web.py'
 --- lib/lp/services/librarianserver/tests/test_web.py	2015-10-14 15:22:01 +0000
 +++ lib/lp/services/librarianserver/tests/test_web.py	2015-12-10 00:38:11 +0000
@@ -15,6 +15,8 @@
  from lazr.uri import URI
  import pytz
  from storm.expr import SQL
++import testtools
++from testtools.matchers import EndsWith
  import transaction
  from zope.component import getUtility
@@ -48,7 +50,7 @@
      return str(parsed.replace(path=parsed.path.replace(old, new)))
--class LibrarianWebTestCase(unittest.TestCase):
++class LibrarianWebTestCase(testtools.TestCase):
      """Test the librarian's web interface."""
      layer = LaunchpadFunctionalLayer
      dbuser = 'librarian'
@@ -237,13 +239,13 @@
          self.failUnlessEqual(
              last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
--    def get_restricted_file_and_public_url(self):
++    def get_restricted_file_and_public_url(self, filename='sample'):
          # Use a regular LibrarianClient to ensure we speak to the
          # nonrestricted port on the librarian which is where secured
          # restricted files are served from.
          client = LibrarianClient()
          fileAlias = client.addFile(
--            'sample', 12, StringIO('a' * 12), contentType='text/plain')
++            filename, 12, StringIO('a' * 12), contentType='text/plain')
          # Note: We're deliberately using the wrong url here: we should be
          # passing secure=True to getURLForAlias, but to use the returned URL
          # we would need a wildcard DNS facility patched into urlopen; instead
@@ -333,6 +335,30 @@
          finally:
              fileObj.close()
++    def test_restricted_with_token_encoding(self):
++        fileAlias, url = self.get_restricted_file_and_public_url('foo~%')
++        self.assertThat(url, EndsWith('/foo~%25'))
++
++        # We have the base url for a restricted file; grant access to it
++        # for a short time.
++        token = TimeLimitedToken.allocate(url)
++
++        # Now we should be able to access the file.
++        fileObj = urlopen(url + "?token=%s" % token)
++        try:
++            self.assertEqual("a" * 12, fileObj.read())
++        finally:
++            fileObj.close()
++
++        # The token is valid even if the filename is encoded differently.
++        mangled_url = url.replace('~', '%7E')
++        self.assertNotEqual(mangled_url, url)
++        fileObj = urlopen(mangled_url + "?token=%s" % token)
++        try:
++            self.assertEqual("a" * 12, fileObj.read())
++        finally:
++            fileObj.close()
++
      def test_restricted_with_expired_token(self):
          fileAlias, url = self.get_restricted_file_and_public_url()
          # We have the base url for a restricted file; grant access to it
@@ -384,6 +410,7 @@
      layer = LaunchpadZopelessLayer
      def setUp(self):
++        super(LibrarianZopelessWebTestCase, self).setUp()
          switch_dbuser(config.librarian.dbuser)
      def commit(self):
@@ -409,6 +436,7 @@
      layer = LaunchpadZopelessLayer
      def setUp(self):
++        super(DeletedContentTestCase, self).setUp()
          switch_dbuser(config.librarian.dbuser)
      def test_deletedContentNotFound(self):