Merge lp:~carlos-abalde/duplicity/google-docs into lp:duplicity/0.6

Proposed by Carlos
Status: Merged
Merged at revision: 770
Proposed branch: lp:~carlos-abalde/duplicity/google-docs
Merge into: lp:duplicity/0.6
Diff against target: 331 lines (+256/-1)
7 files modified
dist/makedist (+1/-0)
duplicity.1 (+3/-1)
duplicity/backend.py (+1/-0)
duplicity/backends/gdocsbackend.py (+247/-0)
duplicity/commandline.py (+1/-0)
po/POTFILES.in (+1/-0)
testing/config.py.tmpl (+2/-0)
To merge this branch: bzr merge lp:~carlos-abalde/duplicity/google-docs
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+70360@code.launchpad.net

Description of the change

New backend implementation storing backups on Google Docs folders.

It's not very fast, but it's handy for personal backups due the low cost of Google Storage. It has been tested for about one week backing up about 15 GB without problems. Supports captchas, 2-step authentication and folders. Depends on google API Python libraries (usually python-gdata package).

An example backend URL is gdocs://carlos.abalde:<email address hidden>/duplicity/personal/projets

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'dist/makedist'
2--- dist/makedist 2011-06-12 14:18:45 +0000
3+++ dist/makedist 2011-08-03 20:04:34 +0000
4@@ -122,6 +122,7 @@
5 "backends/tahoebackend.py",
6 "backends/u1backend.py",
7 "backends/webdavbackend.py",
8+ "backends/gdocsbackend.py",
9 ]:
10 assert not os.system("cp %s/%s %s/src/backends" %
11 (SourceDir, filename, tardir)), filename
12
13=== modified file 'duplicity.1'
14--- duplicity.1 2011-07-16 18:37:47 +0000
15+++ duplicity.1 2011-08-03 20:04:34 +0000
16@@ -63,7 +63,7 @@
17 Duplicity incrementally backs up files and directory
18 by encrypting tar-format volumes with GnuPG and uploading them to a
19 remote (or local) file server. Currently local, ftp, ssh/scp, rsync,
20-WebDAV, WebDAVs, HSi and Amazon S3 backends are available.
21+WebDAV, WebDAVs, Google Docs, HSi and Amazon S3 backends are available.
22 Because duplicity uses
23 librsync, the incremental archives are space efficient and only record
24 the parts of files that have changed since the last backup. Currently
25@@ -826,6 +826,8 @@
26 webdav://user[:password]@other.host/some_dir
27 .PP
28 webdavs://user[:password]@other.host/some_dir
29+.PP
30+gdocs://user[:password]@other.host/some_dir
31
32 .RE
33
34
35=== modified file 'duplicity/backend.py'
36--- duplicity/backend.py 2011-06-17 06:21:42 +0000
37+++ duplicity/backend.py 2011-08-03 20:04:34 +0000
38@@ -183,6 +183,7 @@
39 'u1',
40 'scp', 'ssh', 'sftp',
41 'webdav', 'webdavs',
42+ 'gdocs',
43 'http', 'https',
44 'imap', 'imaps']
45
46
47=== added file 'duplicity/backends/gdocsbackend.py'
48--- duplicity/backends/gdocsbackend.py 1970-01-01 00:00:00 +0000
49+++ duplicity/backends/gdocsbackend.py 2011-08-03 20:04:34 +0000
50@@ -0,0 +1,247 @@
51+# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
52+#
53+# Copyright 2011 Carlos Abalde <carlos.abalde@gmail.com>
54+#
55+# This file is part of duplicity.
56+#
57+# Duplicity is free software; you can redistribute it and/or modify it
58+# under the terms of the GNU General Public License as published by the
59+# Free Software Foundation; either version 2 of the License, or (at your
60+# option) any later version.
61+#
62+# Duplicity is distributed in the hope that it will be useful, but
63+# WITHOUT ANY WARRANTY; without even the implied warranty of
64+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
65+# General Public License for more details.
66+#
67+# You should have received a copy of the GNU General Public License
68+# along with duplicity; if not, write to the Free Software Foundation,
69+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
70+
71+import os.path
72+import string
73+import urllib;
74+
75+import duplicity.backend
76+from duplicity.backend import retry
77+from duplicity import log
78+from duplicity.errors import * #@UnusedWildImport
79+
80+class GDocsBackend(duplicity.backend.Backend):
81+ """Connect to remote store using Google Google Documents List API"""
82+
83+ ROOT_FOLDER_ID = 'folder%3Aroot'
84+ BACKUP_DOCUMENT_TYPE = 'application/binary'
85+
86+ def __init__(self, parsed_url):
87+ duplicity.backend.Backend.__init__(self, parsed_url)
88+
89+ # Import Google Data APIs libraries.
90+ try:
91+ global atom
92+ global gdata
93+ import atom.data
94+ import gdata.client
95+ import gdata.docs.client
96+ import gdata.docs.data
97+ except ImportError:
98+ raise BackendException('Google Docs backend requires Google Data APIs Python '
99+ 'Client Library (see http://code.google.com/p/gdata-python-client/).')
100+
101+ # Setup client instance.
102+ self.client = gdata.docs.client.DocsClient(source='duplicity $version')
103+ self.client.ssl = True
104+ self.client.http_client.debug = False
105+ self.__authorize(parsed_url.username + '@' + parsed_url.hostname, self.get_password())
106+
107+ # Fetch destination folder entry (and crete hierarchy if required).
108+ folder_names = string.split(parsed_url.path[1:], '/')
109+ parent_folder = None
110+ parent_folder_id = GDocsBackend.ROOT_FOLDER_ID
111+ for folder_name in folder_names:
112+ entries = self.__fetch_entries(parent_folder_id, 'folder', folder_name)
113+ if entries is not None:
114+ if len(entries) == 1:
115+ parent_folder = entries[0]
116+ elif len(entries) == 0:
117+ parent_folder = self.client.create(gdata.docs.data.FOLDER_LABEL, folder_name, parent_folder)
118+ else:
119+ parent_folder = None
120+ if parent_folder:
121+ parent_folder_id = parent_folder.resource_id.text
122+ else:
123+ raise BackendException("Error while creating destination folder '%s'." % folder_name)
124+ else:
125+ raise BackendException("Error while fetching destination folder '%s'." % folder_name)
126+ self.folder = parent_folder
127+
128+ @retry
129+ def put(self, source_path, remote_filename=None, raise_errors = False):
130+ """Transfer source_path to remote_filename"""
131+ # Default remote file name.
132+ if not remote_filename:
133+ remote_filename = source_path.get_filename()
134+
135+ # Upload!
136+ try:
137+ # If remote file already exists in destination folder, remove it.
138+ entries = self.__fetch_entries(self.folder.resource_id.text,
139+ GDocsBackend.BACKUP_DOCUMENT_TYPE,
140+ remote_filename)
141+ for entry in entries:
142+ self.client.delete(entry.get_edit_link().href + '?delete=true', force=True)
143+
144+ # Set uploader instance. Note that resumable uploads are required in order to
145+ # enable uploads for all file types.
146+ # (see http://googleappsdeveloper.blogspot.com/2011/05/upload-all-file-types-to-any-google.html)
147+ file = source_path.open()
148+ uploader = gdata.client.ResumableUploader(
149+ self.client, file, GDocsBackend.BACKUP_DOCUMENT_TYPE, os.path.getsize(file.name),
150+ chunk_size=gdata.client.ResumableUploader.DEFAULT_CHUNK_SIZE,
151+ desired_class=gdata.docs.data.DocsEntry)
152+ if uploader:
153+ # Chunked upload.
154+ entry = gdata.docs.data.DocsEntry(title = atom.data.Title(text = remote_filename))
155+ uri = '/feeds/upload/create-session/default/private/full?convert=false'
156+ entry = uploader.UploadFile(uri, entry = entry)
157+ if entry:
158+ # Move to destination folder.
159+ # TODO: any ideas on how to avoid this step?
160+ if self.client.Move(entry, self.folder):
161+ assert not file.close()
162+ return
163+ else:
164+ self.__handle_error("Failed to move uploaded file '%s' to destination remote folder '%s'"
165+ % (source_path.get_filename(), self.folder.title.text), raise_errors)
166+ else:
167+ self.__handle_error("Failed to upload file '%s' to remote folder '%s'"
168+ % (source_path.get_filename(), self.folder.title.text), raise_errors)
169+ else:
170+ self.__handle_error("Failed to initialize upload of file '%s' to remote folder '%s'"
171+ % (source_path.get_filename(), self.folder.title.text), raise_errors)
172+ assert not file.close()
173+ except Exception, e:
174+ self.__handle_error("Failed to upload file '%s' to remote folder '%s': %s"
175+ % (source_path.get_filename(), self.folder.title.text, str(e)), raise_errors)
176+
177+ @retry
178+ def get(self, remote_filename, local_path, raise_errors = False):
179+ """Get remote filename, saving it to local_path"""
180+ try:
181+ entries = self.__fetch_entries(self.folder.resource_id.text,
182+ GDocsBackend.BACKUP_DOCUMENT_TYPE,
183+ remote_filename)
184+ if len(entries) == 1:
185+ entry = entries[0]
186+ self.client.Download(entry, local_path.name)
187+ local_path.setdata()
188+ return
189+ else:
190+ self.__handle_error("Failed to find file '%s' in remote folder '%s'"
191+ % (remote_filename, self.folder.title.text), raise_errors)
192+ except Exception, e:
193+ self.__handle_error("Failed to download file '%s' in remote folder '%s': %s"
194+ % (remote_filename, self.folder.title.text, str(e)), raise_errors)
195+
196+ @retry
197+ def list(self, raise_errors = False):
198+ """List files in folder"""
199+ try:
200+ entries = self.__fetch_entries(self.folder.resource_id.text,
201+ GDocsBackend.BACKUP_DOCUMENT_TYPE)
202+ return [entry.title.text for entry in entries]
203+ except Exception, e:
204+ self.__handle_error("Failed to fetch list of files in remote folder '%s': %s"
205+ % (self.folder.title.text, str(e)), raise_errors)
206+
207+ @retry
208+ def delete(self, filename_list, raise_errors = False):
209+ """Delete files in filename_list"""
210+ for filename in filename_list:
211+ try:
212+ entries = self.__fetch_entries(self.folder.resource_id.text,
213+ GDocsBackend.BACKUP_DOCUMENT_TYPE,
214+ filename)
215+ if len(entries) > 0:
216+ success = True
217+ for entry in entries:
218+ if not self.client.delete(entry.get_edit_link().href + '?delete=true', force = True):
219+ success = False
220+ if not success:
221+ self.__handle_error("Failed to remove file '%s' in remote folder '%s'"
222+ % (filename, self.folder.title.text), raise_errors)
223+ else:
224+ log.Warn("Failed to fetch file '%s' in remote folder '%s'"
225+ % (filename, self.folder.title.text))
226+ except Exception, e:
227+ self.__handle_error("Failed to remove file '%s' in remote folder '%s': %s"
228+ % (filename, self.folder.title.text, str(e)), raise_errors)
229+
230+ def __handle_error(self, message, raise_errors = True):
231+ if raise_errors:
232+ raise BackendException(message)
233+ else:
234+ log.FatalError(message, log.ErrorCode.backend_error)
235+
236+ def __authorize(self, email, password, captcha_token = None, captcha_response = None):
237+ try:
238+ self.client.client_login(email,
239+ password,
240+ source = 'duplicity $version',
241+ service = 'writely',
242+ captcha_token = captcha_token,
243+ captcha_response = captcha_response)
244+ except gdata.client.CaptchaChallenge, challenge:
245+ print('A captcha challenge in required. Please visit ' + challenge.captcha_url)
246+ answer = None
247+ while not answer:
248+ answer = raw_input('Answer to the challenge? ')
249+ self.__authorize(email, password, challenge.captcha_token, answer)
250+ except gdata.client.BadAuthentication:
251+ self.__handle_error('Invalid user credentials given. Be aware that accounts '
252+ 'that use 2-step verification require creating an application specific '
253+ 'access code for using this Duplicity backend. Follow the instrucction in '
254+ 'http://www.google.com/support/accounts/bin/static.py?page=guide.cs&guide=1056283&topic=1056286 '
255+ 'and create your application-specific password to run duplicity backups.')
256+ except Exception, e:
257+ self.__handle_error('Error while authenticating client: %s.' % str(e))
258+
259+ def __fetch_entries(self, folder_id, type, title = None):
260+ # Build URI.
261+ uri = '/feeds/default/private/full/%s/contents' % folder_id
262+ if type == 'folder':
263+ uri += '/-/folder?showfolders=true'
264+ elif type == GDocsBackend.BACKUP_DOCUMENT_TYPE:
265+ uri += '?showfolders=false'
266+ else:
267+ uri += '?showfolders=true'
268+ if title:
269+ uri += '&title=' + urllib.quote(title) + '&title-exact=true'
270+
271+ try:
272+ # Fetch entries
273+ entries = self.client.get_everything(uri = uri)
274+
275+ # When filtering by entry title, API is returning (don't know why) documents in other
276+ # folders (apart from folder_id) matching the title, so some extra filtering is required.
277+ if title:
278+ result = []
279+ for entry in entries:
280+ if (not type) or (entry.get_document_type() == type):
281+ if folder_id != GDocsBackend.ROOT_FOLDER_ID:
282+ for link in entry.in_folders():
283+ folder_entry = self.client.get_entry(link.href, None, None,
284+ desired_class=gdata.docs.data.DocsEntry)
285+ if folder_entry and (folder_entry.resource_id.text == folder_id):
286+ result.append(entry)
287+ elif len(entry.in_folders()) == 0:
288+ result.append(entry)
289+ else:
290+ result = entries
291+
292+ # Done!
293+ return result
294+ except Exception, e:
295+ self.__handle_error('Error while fetching remote entries: %s.' % str(e))
296+
297+duplicity.backend.register_backend('gdocs', GDocsBackend)
298
299=== modified file 'duplicity/commandline.py'
300--- duplicity/commandline.py 2011-07-16 18:37:47 +0000
301+++ duplicity/commandline.py 2011-08-03 20:04:34 +0000
302@@ -750,6 +750,7 @@
303 tahoe://%(alias)s/%(directory)s
304 webdav://%(user)s[:%(password)s]@%(other_host)s/%(some_dir)s
305 webdavs://%(user)s[:%(password)s]@%(other_host)s/%(some_dir)s
306+ gdocs://%(user)s[:%(password)s]@%(other_host)s/%(some_dir)s
307
308 """ % dict
309
310
311=== modified file 'po/POTFILES.in'
312--- po/POTFILES.in 2009-08-12 19:05:52 +0000
313+++ po/POTFILES.in 2011-08-03 20:04:34 +0000
314@@ -43,3 +43,4 @@
315 duplicity/backends/sshbackend.py
316 duplicity/backends/tahoebackend.py
317 duplicity/backends/webdavbackend.py
318+duplicity/backends/gdocsbackend.py
319
320=== modified file 'testing/config.py.tmpl'
321--- testing/config.py.tmpl 2011-06-17 18:22:28 +0000
322+++ testing/config.py.tmpl 2011-08-03 20:04:34 +0000
323@@ -83,6 +83,8 @@
324 webdavs_url = None
325 webdavs_password = None
326
327+gdocs_url = None
328+gdocs_password = None
329
330 def setup():
331 """ setup for unit tests """

Subscribers

People subscribed via source and target branches

to all changes: