Merge lp:~bmerry/duplicity/pydrive-id-cache into lp:~duplicity-team/duplicity/0.7-series

Proposed by Bruce Merry
Status: Merged
Merged at revision: 1129
Proposed branch: lp:~bmerry/duplicity/pydrive-id-cache
Merge into: lp:~duplicity-team/duplicity/0.7-series
Diff against target: 152 lines (+85/-23)
1 file modified
duplicity/backends/pydrivebackend.py (+85/-23)
To merge this branch: bzr merge lp:~bmerry/duplicity/pydrive-id-cache
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+270903@code.launchpad.net

Description of the change

Second attempt: I've now merged with the master branch. That merge did some strange things (it undid a whole bunch of my changes, which I had to put back in in the following commit), so please take a quick look to ensure that this merge is sane (I don't normally use bzr so I might be doing it wrong).

This fixes the issue a number of users (including myself) have been having with duplicity creating files with duplicate filenames on Google Drive. It keeps a runtime cache of filename to object ID mappings, so that once it has uploaded an object it won't be fooled by weakly consistent directory listings. I've been using it for a while with no more duplicate filename issues, and another user has reported that it's fixed his issues as well.

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'duplicity/backends/pydrivebackend.py'
2--- duplicity/backends/pydrivebackend.py 2015-09-06 14:48:27 +0000
3+++ duplicity/backends/pydrivebackend.py 2015-09-13 17:55:46 +0000
4@@ -20,6 +20,7 @@
5 import os
6
7 import duplicity.backend
8+from duplicity import log
9 from duplicity.errors import BackendException
10
11
12@@ -35,6 +36,7 @@
13 from oauth2client.client import SignedJwtAssertionCredentials
14 from pydrive.auth import GoogleAuth
15 from pydrive.drive import GoogleDrive
16+ from pydrive.files import FileNotUploadedError
17 except ImportError:
18 raise BackendException('PyDrive backend requires PyDrive installation'
19 'Please read the manpage to fix.')
20@@ -73,49 +75,109 @@
21 folder.Upload()
22 parent_folder_id = folder['id']
23 self.folder = parent_folder_id
24-
25- def FilesList(self):
26- return self.drive.ListFile({'q': "'" + self.folder + "' in parents and trashed=false"}).GetList()
27+ self.id_cache = {}
28+
29+ def file_by_name(self, filename):
30+ from pydrive.files import ApiRequestError
31+ if filename in self.id_cache:
32+ # It might since have been locally moved, renamed or deleted, so we
33+ # need to validate the entry.
34+ file_id = self.id_cache[filename]
35+ drive_file = self.drive.CreateFile({'id': file_id})
36+ try:
37+ if drive_file['title'] == filename and not drive_file['labels']['trashed']:
38+ for parent in drive_file['parents']:
39+ if parent['id'] == self.folder:
40+ log.Info("PyDrive backend: found file '%s' with id %s in ID cache" % (filename, file_id))
41+ return drive_file
42+ except ApiRequestError as error:
43+ # A 404 occurs if the ID is no longer valid
44+ if error.args[0].resp.status != 404:
45+ raise
46+ # If we get here, the cache entry is invalid
47+ log.Info("PyDrive backend: invalidating '%s' (previously ID %s) from ID cache" % (filename, file_id))
48+ del self.id_cache[filename]
49+
50+ # Not found in the cache, so use directory listing. This is less
51+ # reliable because there is no strong consistency.
52+ q = "title='%s' and '%s' in parents and trashed=false" % (filename, self.folder)
53+ fields = 'items(title,id,fileSize,downloadUrl,exportLinks),nextPageToken'
54+ flist = self.drive.ListFile({'q': q, 'fields': fields}).GetList()
55+ if len(flist) > 1:
56+ log.FatalError(_("PyDrive backend: multiple files called '%s'.") % (filename,))
57+ elif flist:
58+ file_id = flist[0]['id']
59+ self.id_cache[filename] = flist[0]['id']
60+ log.Info("PyDrive backend: found file '%s' with id %s on server, adding to cache" % (filename, file_id))
61+ return flist[0]
62+ log.Info("PyDrive backend: file '%s' not found in cache or on server" % (filename,))
63+ return None
64
65 def id_by_name(self, filename):
66- try:
67- return next(item for item in self.FilesList() if item['title'] == filename)['id']
68- except:
69+ drive_file = self.file_by_name(filename)
70+ if drive_file is None:
71 return ''
72+ else:
73+ return drive_file['id']
74
75 def _put(self, source_path, remote_filename):
76- drive_file = self.drive.CreateFile({'title': remote_filename, 'parents': [{"kind": "drive#fileLink", "id": self.folder}]})
77+ drive_file = self.file_by_name(remote_filename)
78+ if drive_file is None:
79+ # No existing file, make a new one
80+ drive_file = self.drive.CreateFile({'title': remote_filename, 'parents': [{"kind": "drive#fileLink", "id": self.folder}]})
81+ log.Info("PyDrive backend: creating new file '%s'" % (remote_filename,))
82+ else:
83+ log.Info("PyDrive backend: replacing existing file '%s' with id '%s'" % (
84+ remote_filename, drive_file['id']))
85 drive_file.SetContentFile(source_path.name)
86 drive_file.Upload()
87+ self.id_cache[remote_filename] = drive_file['id']
88
89 def _get(self, remote_filename, local_path):
90- drive_file = self.drive.CreateFile({'id': self.id_by_name(remote_filename)})
91+ drive_file = self.file_by_name(remote_filename)
92 drive_file.GetContentFile(local_path.name)
93
94 def _list(self):
95- return [item['title'] for item in self.FilesList()]
96+ drive_files = self.drive.ListFile({
97+ 'q': "'" + self.folder + "' in parents and trashed=false",
98+ 'fields': 'items(title,id),nextPageToken'}).GetList()
99+ filenames = set(item['title'] for item in drive_files)
100+ # Check the cache as well. A file might have just been uploaded but
101+ # not yet appear in the listing.
102+ # Note: do not use iterkeys() here, because file_by_name will modify
103+ # the cache if it finds invalid entries.
104+ for filename in self.id_cache.keys():
105+ if (filename not in filenames) and (self.file_by_name(filename) is not None):
106+ filenames.add(filename)
107+ return list(filenames)
108
109 def _delete(self, filename):
110 file_id = self.id_by_name(filename)
111- drive_file = self.drive.CreateFile({'id': file_id})
112- drive_file.auth.service.files().delete(fileId=drive_file['id']).execute()
113-
114- def _delete_list(self, filename_list):
115- to_remove = set(filename_list)
116- for item in self.FilesList():
117- if item['title'] not in to_remove:
118- continue
119- file_id = item['id']
120- drive_file = self.drive.CreateFile({'id': file_id})
121- drive_file.auth.service.files().delete(fileId=drive_file['id']).execute()
122+ if file_id != '':
123+ self.drive.auth.service.files().delete(fileId=file_id).execute()
124+ else:
125+ log.Warn("File '%s' does not exist while trying to delete it" % (filename,))
126
127 def _query(self, filename):
128- try:
129- size = int((item for item in self.FilesList() if item['title'] == filename).next()['fileSize'])
130- except:
131+ drive_file = self.file_by_name(filename)
132+ if drive_file is None:
133 size = -1
134+ else:
135+ size = int(drive_file['fileSize'])
136 return {'size': size}
137
138+ def _error_code(self, operation, error):
139+ from pydrive.files import ApiRequestError, FileNotUploadedError
140+ if isinstance(error, FileNotUploadedError):
141+ return log.ErrorCode.backend_not_found
142+ elif isinstance(error, ApiRequestError):
143+ http_status = error.args[0].resp.status
144+ if http_status == 404:
145+ return log.ErrorCode.backend_not_found
146+ elif http_status == 403:
147+ return log.ErrorCode.backend_permission_denied
148+ return log.ErrorCode.backend_error
149+
150 duplicity.backend.register_backend('pydrive', PyDriveBackend)
151 """ pydrive is an alternate way to access gdocs """
152 duplicity.backend.register_backend('pydrive+gdocs', PyDriveBackend)

Subscribers

People subscribed via source and target branches