Merge lp:~mterry/duplicity/tarfile into lp:duplicity/0.6

Proposed by Michael Terry
Status: Merged
Merged at revision: 783
Proposed branch: lp:~mterry/duplicity/tarfile
Merge into: lp:duplicity/0.6
Diff against target: 4277 lines (+2257/-1404)
13 files modified
duplicity/diffdir.py (+10/-12)
duplicity/dup_temp.py (+12/-0)
duplicity/gpg.py (+14/-0)
duplicity/patchdir.py (+10/-9)
duplicity/path.py (+8/-8)
duplicity/tarfile.py (+2154/-1271)
duplicity/util.py (+27/-0)
rdiffdir (+0/-1)
tarfile-CHANGES (+3/-0)
tarfile-LICENSE (+1/-1)
testing/diffdirtest.py (+4/-2)
testing/patchdirtest.py (+10/-17)
testing/test_tarfile.py (+4/-83)
To merge this branch: bzr merge lp:~mterry/duplicity/tarfile
Reviewer Review Type Date Requested Status
duplicity-team Pending
Review via email: mp+72422@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Michael Terry (mterry) wrote :

Due to conversations in the mailing list, I've retooled this branch to, instead of using the system tarfile.py, just updating our internal copy to python2.7's version (with changes for 2.4-compatibility).

Once we drop 2.4 support, it will be a simple change to drop our internal version and use the system version again.

I've run the test suite against this branch (under python 2.4 too), but of course it could use more testing.

Revision history for this message
edso (ed.so) wrote :

Does this solve
https://bugs.launchpad.net/duplicity/+bug/690549
?

Or is the patch there invalid, because tarfile.py does not support such big values?

ede

On 23.08.2011 20:36, Michael Terry wrote:
> Due to conversations in the mailing list, I've retooled this branch to, instead of using the system tarfile.py, just updating our internal copy to python2.7's version (with changes for 2.4-compatibility).
>
> Once we drop 2.4 support, it will be a simple change to drop our internal version and use the system version again.
>
> I've run the test suite against this branch (under python 2.4 too), but of course it could use more testing.

Revision history for this message
Michael Terry (mterry) wrote :

The patch there looks invalid, because it looks like it just assigns more bits in the tarfile for the gid/uid, which I don't think is part of the standard. Though maybe I'm misinterpreting the patch.

However, I do think this branch would solve that bug, as newer tarfiles seem to have explicit support for negative numbers (see the itn() function in this branch's tarfile.py).

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'duplicity/diffdir.py'
2--- duplicity/diffdir.py 2011-06-13 15:40:19 +0000
3+++ duplicity/diffdir.py 2011-08-23 18:28:26 +0000
4@@ -29,6 +29,7 @@
5
6 import cStringIO, types
7 from duplicity import statistics
8+from duplicity import util
9 from duplicity.path import * #@UnusedWildImport
10 from duplicity.lazy import * #@UnusedWildImport
11
12@@ -181,7 +182,7 @@
13 """
14 collated = collate2iters(new_iter, sig_iter)
15 if sig_fileobj:
16- sigTarFile = tarfile.TarFile("arbitrary", "w", sig_fileobj)
17+ sigTarFile = util.make_tarfile("w", sig_fileobj)
18 else:
19 sigTarFile = None
20 for new_path, sig_path in collated:
21@@ -224,16 +225,17 @@
22 """
23 Convert signature tar file object open for reading into path iter
24 """
25- tf = tarfile.TarFile("Arbitrary Name", "r", sigtarobj)
26- tf.debug = 2
27+ tf = util.make_tarfile("r", sigtarobj)
28+ tf.debug = 1
29 for tarinfo in tf:
30+ tiname = util.get_tarinfo_name(tarinfo)
31 for prefix in ["signature/", "snapshot/", "deleted/"]:
32- if tarinfo.name.startswith(prefix):
33- # strip prefix and from name and set it to difftype
34- name, difftype = tarinfo.name[len(prefix):], prefix[:-1]
35+ if tiname.startswith(prefix):
36+ # strip prefix and '/' from name and set it to difftype
37+ name, difftype = tiname[len(prefix):], prefix[:-1]
38 break
39 else:
40- raise DiffDirException("Bad tarinfo name %s" % (tarinfo.name,))
41+ raise DiffDirException("Bad tarinfo name %s" % (tiname,))
42
43 index = tuple(name.split("/"))
44 if not index[-1]:
45@@ -464,16 +466,12 @@
46 self.remember_value = None # holds index of next block
47 self.remember_block = None # holds block of next block
48
49- # We need to instantiate a dummy TarFile just to get access to
50- # some of the functions like _get_full_headers.
51- self.tf = tarfile.TarFromIterator(None)
52-
53 def tarinfo2tarblock(self, index, tarinfo, file_data = ""):
54 """
55 Make tarblock out of tarinfo and file data
56 """
57 tarinfo.size = len(file_data)
58- headers = self.tf._get_full_headers(tarinfo)
59+ headers = tarinfo.tobuf()
60 blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE) #@UnusedVariable
61 if remainder > 0:
62 filler_data = "\0" * (tarfile.BLOCKSIZE - remainder)
63
64=== modified file 'duplicity/dup_temp.py'
65--- duplicity/dup_temp.py 2011-08-18 19:17:55 +0000
66+++ duplicity/dup_temp.py 2011-08-23 18:28:26 +0000
67@@ -207,6 +207,18 @@
68 """
69 return self.fileobj.read(length)
70
71+ def tell(self):
72+ """
73+ Returns current location of fileobj
74+ """
75+ return self.fileobj.tell()
76+
77+ def seek(self, offset):
78+ """
79+ Seeks to a location of fileobj
80+ """
81+ return self.fileobj.seek(offset)
82+
83 def close(self):
84 """
85 Close fileobj, running hooks right afterwards
86
87=== modified file 'duplicity/gpg.py'
88--- duplicity/gpg.py 2011-07-16 18:37:47 +0000
89+++ duplicity/gpg.py 2011-08-23 18:28:26 +0000
90@@ -96,6 +96,7 @@
91 self.logger_fp = tempfile.TemporaryFile()
92 self.stderr_fp = tempfile.TemporaryFile()
93 self.name = encrypt_path
94+ self.byte_count = 0
95
96 # Start GPG process - copied from GnuPGInterface docstring.
97 gnupg = GnuPGInterface.GnuPG()
98@@ -157,6 +158,8 @@
99 def read(self, length = -1):
100 try:
101 res = self.gpg_output.read(length)
102+ if res is not None:
103+ self.byte_count += len(res)
104 except Exception:
105 self.gpg_failed()
106 return res
107@@ -164,10 +167,21 @@
108 def write(self, buf):
109 try:
110 res = self.gpg_input.write(buf)
111+ if res is not None:
112+ self.byte_count += len(res)
113 except Exception:
114 self.gpg_failed()
115 return res
116
117+ def tell(self):
118+ return self.byte_count
119+
120+ def seek(self, offset):
121+ assert not self.encrypt
122+ assert offset >= self.byte_count, "%d < %d" % (offset, self.byte_count)
123+ if offset > self.byte_count:
124+ self.read(offset - self.byte_count)
125+
126 def gpg_failed(self):
127 msg = "GPG Failed, see log below:\n"
128 msg += "===== Begin GnuPG log =====\n"
129
130=== modified file 'duplicity/patchdir.py'
131--- duplicity/patchdir.py 2011-03-06 12:30:14 +0000
132+++ duplicity/patchdir.py 2011-08-23 18:28:26 +0000
133@@ -141,8 +141,9 @@
134 """Return (index, difftype, multivol) pair from tarinfo object"""
135 for prefix in ["snapshot/", "diff/", "deleted/",
136 "multivol_diff/", "multivol_snapshot/"]:
137- if tarinfo.name.startswith( prefix ):
138- name = tarinfo.name[len( prefix ):] # strip prefix
139+ tiname = util.get_tarinfo_name( tarinfo )
140+ if tiname.startswith( prefix ):
141+ name = tiname[len( prefix ):] # strip prefix
142 if prefix.startswith( "multivol" ):
143 if prefix == "multivol_diff/":
144 difftype = "diff"
145@@ -150,28 +151,28 @@
146 difftype = "snapshot"
147 multivol = 1
148 name, num_subs = \
149- re.subn( "(?s)^multivol_(diff|snapshot)/(.*)/[0-9]+$",
150- "\\2", tarinfo.name )
151+ re.subn( "(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$",
152+ "\\2", tiname )
153 if num_subs != 1:
154 raise PatchDirException( "Unrecognized diff entry %s" %
155- ( tarinfo.name, ) )
156+ ( tiname, ) )
157 else:
158 difftype = prefix[:-1] # strip trailing /
159- name = tarinfo.name[len( prefix ):]
160+ name = tiname[len( prefix ):]
161 if name.endswith( "/" ):
162 name = name[:-1] # strip trailing /'s
163 multivol = 0
164 break
165 else:
166 raise PatchDirException( "Unrecognized diff entry %s" %
167- ( tarinfo.name, ) )
168+ ( tiname, ) )
169 if name == "." or name == "":
170 index = ()
171 else:
172 index = tuple( name.split( "/" ) )
173 if '..' in index:
174 raise PatchDirException( "Tar entry %s contains '..'. Security "
175- "violation" % ( tarinfo.name, ) )
176+ "violation" % ( tiname, ) )
177 return ( index, difftype, multivol )
178
179
180@@ -320,7 +321,7 @@
181 if self.current_fp:
182 assert not self.current_fp.close()
183 self.current_fp = self.fileobj_iter.next()
184- self.tarfile = tarfile.TarFile( "arbitrary", "r", self.current_fp )
185+ self.tarfile = util.make_tarfile("r", self.current_fp)
186 self.tar_iter = iter( self.tarfile )
187
188 def next( self ):
189
190=== modified file 'duplicity/path.py'
191--- duplicity/path.py 2011-08-18 20:08:56 +0000
192+++ duplicity/path.py 2011-08-23 18:28:26 +0000
193@@ -26,12 +26,12 @@
194
195 """
196
197-import stat, errno, socket, time, re, gzip
198+import stat, errno, socket, time, re, gzip, pwd, grp
199
200+from duplicity import tarfile
201 from duplicity import file_naming
202 from duplicity import globals
203 from duplicity import gpg
204-from duplicity import tarfile
205 from duplicity import util
206 from duplicity import librsync
207 from duplicity import log #@UnusedImport
208@@ -206,13 +206,13 @@
209 try:
210 if globals.numeric_owner:
211 raise KeyError
212- self.stat.st_uid = tarfile.uname2uid(tarinfo.uname)
213+ self.stat.st_uid = pwd.getpwnam(tarinfo.uname)[2]
214 except KeyError:
215 self.stat.st_uid = tarinfo.uid
216 try:
217 if globals.numeric_owner:
218 raise KeyError
219- self.stat.st_gid = tarfile.gname2gid(tarinfo.gname)
220+ self.stat.st_gid = grp.getgrnam(tarinfo.gname)[2]
221 except KeyError:
222 self.stat.st_gid = tarinfo.gid
223
224@@ -284,13 +284,13 @@
225 ti.mtime = int(self.stat.st_mtime)
226
227 try:
228- ti.uname = tarfile.uid2uname(ti.uid)
229+ ti.uname = pwd.getpwuid(ti.uid)[0]
230 except KeyError:
231- pass
232+ ti.uname = ''
233 try:
234- ti.gname = tarfile.gid2gname(ti.gid)
235+ ti.gname = grp.getgrgid(ti.gid)[0]
236 except KeyError:
237- pass
238+ ti.gname = ''
239
240 if ti.type in (tarfile.CHRTYPE, tarfile.BLKTYPE):
241 if hasattr(os, "major") and hasattr(os, "minor"):
242
243=== modified file 'duplicity/tarfile.py'
244--- duplicity/tarfile.py 2011-06-23 15:47:44 +0000
245+++ duplicity/tarfile.py 2011-08-23 18:28:26 +0000
246@@ -1,17 +1,9 @@
247-#!/usr/bin/env python
248-# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
249+#! /usr/bin/python2.7
250+# -*- coding: iso-8859-1 -*-
251 #-------------------------------------------------------------------
252 # tarfile.py
253-#
254-# Module for reading and writing .tar and tar.gz files.
255-#
256-# Needs at least Python version 2.2.
257-#
258-# Please consult the html documentation in this distribution
259-# for further details on how to use tarfile.
260-#
261 #-------------------------------------------------------------------
262-# Copyright (C) 2002 Lars Gustabel <lars@gustaebel.de>
263+# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
264 # All rights reserved.
265 #
266 # Permission is hereby granted, free of charge, to any person
267@@ -38,98 +30,105 @@
268 """Read from and write to tar format archives.
269 """
270
271-__version__ = "$Revision: 1.8 $"
272-# $Source: /sources/duplicity/duplicity/duplicity/tarfile.py,v $
273+__version__ = "$Revision: 85213 $"
274+# $Source$
275
276-version = "0.4.9"
277-__author__ = "Lars Gustabel (lars@gustaebel.de)"
278-__date__ = "$Date: 2008/11/16 18:48:15 $"
279-__cvsid__ = "$Id: tarfile.py,v 1.8 2008/11/16 18:48:15 loafman Exp $"
280-__credits__ = "Gustavo Niemeyer for his support, " \
281- "Detlef Lannert for some early contributions"
282+version = "0.9.0"
283+__author__ = "Lars Gustäbel (lars@gustaebel.de)"
284+__date__ = "$Date: 2010-10-04 10:37:53 -0500 (Mon, 04 Oct 2010) $"
285+__cvsid__ = "$Id: tarfile.py 85213 2010-10-04 15:37:53Z lars.gustaebel $"
286+__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
287
288 #---------
289 # Imports
290 #---------
291 import sys
292 import os
293-import __builtin__
294 import shutil
295 import stat
296 import errno
297 import time
298+import struct
299+import copy
300+import re
301+import operator
302
303 try:
304 import grp, pwd
305 except ImportError:
306 grp = pwd = None
307-# These are used later to cache user and group names and ids
308-gname_dict = uname_dict = uid_dict = gid_dict = None
309-
310-# We won't need this anymore in Python 2.3
311-#
312-# We import the _tarfile extension, that contains
313-# some useful functions to handle devices and symlinks.
314-# We inject them into os module, as if we were under 2.3.
315-#
316-try:
317- import _tarfile
318- if _tarfile.mknod is None:
319- _tarfile = None
320-except ImportError:
321- _tarfile = None
322-if _tarfile and not hasattr(os, "mknod"):
323- os.mknod = _tarfile.mknod
324-if _tarfile and not hasattr(os, "major"):
325- os.major = _tarfile.major
326-if _tarfile and not hasattr(os, "minor"):
327- os.minor = _tarfile.minor
328-if _tarfile and not hasattr(os, "makedev"):
329- os.makedev = _tarfile.makedev
330-if _tarfile and not hasattr(os, "lchown"):
331- os.lchown = _tarfile.lchown
332-
333-# XXX remove for release (2.3)
334-if sys.version_info[:2] < (2,3):
335- True = 1
336- False = 0
337-
338-#---------------------------------------------------------
339-# GNUtar constants
340-#---------------------------------------------------------
341-BLOCKSIZE = 512 # length of processing blocks
342+
343+# from tarfile import *
344+__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
345+
346+#---------------------------------------------------------
347+# tar constants
348+#---------------------------------------------------------
349+NUL = "\0" # the null character
350+BLOCKSIZE = 512 # length of processing blocks
351 RECORDSIZE = BLOCKSIZE * 20 # length of records
352-MAGIC = "ustar" # magic tar string
353-VERSION = "00" # version number
354-
355-LENGTH_NAME = 100 # maximal length of a filename
356-LENGTH_LINK = 100 # maximal length of a linkname
357-
358-REGTYPE = "0" # regular file
359+GNU_MAGIC = "ustar \0" # magic gnu tar string
360+POSIX_MAGIC = "ustar\x0000" # magic posix tar string
361+
362+LENGTH_NAME = 100 # maximum length of a filename
363+LENGTH_LINK = 100 # maximum length of a linkname
364+LENGTH_PREFIX = 155 # maximum length of the prefix field
365+
366+REGTYPE = "0" # regular file
367 AREGTYPE = "\0" # regular file
368-LNKTYPE = "1" # link (inside tarfile)
369-SYMTYPE = "2" # symbolic link
370-CHRTYPE = "3" # character special device
371-BLKTYPE = "4" # block special device
372-DIRTYPE = "5" # directory
373+LNKTYPE = "1" # link (inside tarfile)
374+SYMTYPE = "2" # symbolic link
375+CHRTYPE = "3" # character special device
376+BLKTYPE = "4" # block special device
377+DIRTYPE = "5" # directory
378 FIFOTYPE = "6" # fifo special device
379 CONTTYPE = "7" # contiguous file
380
381-GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
382-GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
383-GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
384+GNUTYPE_LONGNAME = "L" # GNU tar longname
385+GNUTYPE_LONGLINK = "K" # GNU tar longlink
386+GNUTYPE_SPARSE = "S" # GNU tar sparse file
387+
388+XHDTYPE = "x" # POSIX.1-2001 extended header
389+XGLTYPE = "g" # POSIX.1-2001 global header
390+SOLARIS_XHDTYPE = "X" # Solaris extended header
391+
392+USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
393+GNU_FORMAT = 1 # GNU tar format
394+PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
395+DEFAULT_FORMAT = GNU_FORMAT
396
397 #---------------------------------------------------------
398 # tarfile constants
399 #---------------------------------------------------------
400-SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
401- SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
402- CONTTYPE, GNUTYPE_LONGNAME,
403- GNUTYPE_LONGLINK, GNUTYPE_SPARSE,
404- CHRTYPE, BLKTYPE)
405-
406-REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
407- CONTTYPE, GNUTYPE_SPARSE) # represent regular files
408+# File types that tarfile supports:
409+SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
410+ SYMTYPE, DIRTYPE, FIFOTYPE,
411+ CONTTYPE, CHRTYPE, BLKTYPE,
412+ GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
413+ GNUTYPE_SPARSE)
414+
415+# File types that will be treated as a regular file.
416+REGULAR_TYPES = (REGTYPE, AREGTYPE,
417+ CONTTYPE, GNUTYPE_SPARSE)
418+
419+# File types that are part of the GNU tar format.
420+GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
421+ GNUTYPE_SPARSE)
422+
423+# Fields from a pax header that override a TarInfo attribute.
424+PAX_FIELDS = ("path", "linkpath", "size", "mtime",
425+ "uid", "gid", "uname", "gname")
426+
427+# Fields in a pax header that are numbers, all other fields
428+# are treated as strings.
429+PAX_NUMBER_FIELDS = {
430+ "atime": float,
431+ "ctime": float,
432+ "mtime": float,
433+ "uid": int,
434+ "gid": int,
435+ "size": int
436+}
437
438 #---------------------------------------------------------
439 # Bits used in the mode field, values in octal.
440@@ -145,34 +144,117 @@
441 TSGID = 02000 # set GID on execution
442 TSVTX = 01000 # reserved
443
444-TUREAD = 00400 # read by owner
445-TUWRITE = 00200 # write by owner
446-TUEXEC = 00100 # execute/search by owner
447-TGREAD = 00040 # read by group
448-TGWRITE = 00020 # write by group
449-TGEXEC = 00010 # execute/search by group
450-TOREAD = 00004 # read by other
451-TOWRITE = 00002 # write by other
452-TOEXEC = 00001 # execute/search by other
453+TUREAD = 0400 # read by owner
454+TUWRITE = 0200 # write by owner
455+TUEXEC = 0100 # execute/search by owner
456+TGREAD = 0040 # read by group
457+TGWRITE = 0020 # write by group
458+TGEXEC = 0010 # execute/search by group
459+TOREAD = 0004 # read by other
460+TOWRITE = 0002 # write by other
461+TOEXEC = 0001 # execute/search by other
462+
463+#---------------------------------------------------------
464+# initialization
465+#---------------------------------------------------------
466+ENCODING = sys.getfilesystemencoding()
467+if ENCODING is None:
468+ ENCODING = sys.getdefaultencoding()
469
470 #---------------------------------------------------------
471 # Some useful functions
472 #---------------------------------------------------------
473+
474+def stn(s, length):
475+ """Convert a python string to a null-terminated string buffer.
476+ """
477+ return s[:length] + (length - len(s)) * NUL
478+
479 def nts(s):
480- """Convert a null-terminated string buffer to a python string.
481- """
482- return s.split("\0", 1)[0]
483-
484-def calc_chksum(buf):
485- """Calculate the checksum for a member's header. It's a simple addition
486- of all bytes, treating the chksum field as if filled with spaces.
487- buf is a 512 byte long string buffer which holds the header.
488- """
489- chk = 256 # chksum field is treated as blanks,
490- # so the initial value is 8 * ord(" ")
491- for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum
492- for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum
493- return chk
494+ """Convert a null-terminated string field to a python string.
495+ """
496+ # Use the string up to the first null char.
497+ p = s.find("\0")
498+ if p == -1:
499+ return s
500+ return s[:p]
501+
502+def nti(s):
503+ """Convert a number field to a python number.
504+ """
505+ # There are two possible encodings for a number field, see
506+ # itn() below.
507+ if s[0] != chr(0200):
508+ try:
509+ n = int(nts(s) or "0", 8)
510+ except ValueError:
511+ raise InvalidHeaderError("invalid header")
512+ else:
513+ n = 0L
514+ for i in xrange(len(s) - 1):
515+ n <<= 8
516+ n += ord(s[i + 1])
517+ return n
518+
519+def itn(n, digits=8, format=DEFAULT_FORMAT):
520+ """Convert a python number to a number field.
521+ """
522+ # POSIX 1003.1-1988 requires numbers to be encoded as a string of
523+ # octal digits followed by a null-byte, this allows values up to
524+ # (8**(digits-1))-1. GNU tar allows storing numbers greater than
525+ # that if necessary. A leading 0200 byte indicates this particular
526+ # encoding, the following digits-1 bytes are a big-endian
527+ # representation. This allows values up to (256**(digits-1))-1.
528+ if 0 <= n < 8 ** (digits - 1):
529+ s = "%0*o" % (digits - 1, n) + NUL
530+ else:
531+ if format != GNU_FORMAT or n >= 256 ** (digits - 1):
532+ raise ValueError("overflow in number field")
533+
534+ if n < 0:
535+ # XXX We mimic GNU tar's behaviour with negative numbers,
536+ # this could raise OverflowError.
537+ n = struct.unpack("L", struct.pack("l", n))[0]
538+
539+ s = ""
540+ for i in xrange(digits - 1):
541+ s = chr(n & 0377) + s
542+ n >>= 8
543+ s = chr(0200) + s
544+ return s
545+
546+def uts(s, encoding, errors):
547+ """Convert a unicode object to a string.
548+ """
549+ if errors == "utf-8":
550+ # An extra error handler similar to the -o invalid=UTF-8 option
551+ # in POSIX.1-2001. Replace untranslatable characters with their
552+ # UTF-8 representation.
553+ try:
554+ return s.encode(encoding, "strict")
555+ except UnicodeEncodeError:
556+ x = []
557+ for c in s:
558+ try:
559+ x.append(c.encode(encoding, "strict"))
560+ except UnicodeEncodeError:
561+ x.append(c.encode("utf8"))
562+ return "".join(x)
563+ else:
564+ return s.encode(encoding, errors)
565+
566+def calc_chksums(buf):
567+ """Calculate the checksum for a member's header by summing up all
568+ characters except for the chksum field which is treated as if
569+ it was filled with spaces. According to the GNU tar sources,
570+ some tars (Sun and NeXT) calculate chksum with signed char,
571+ which will be different if there are chars in the buffer with
572+ the high bit set. So we calculate two checksums, unsigned and
573+ signed.
574+ """
575+ unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
576+ signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
577+ return unsigned_chksum, signed_chksum
578
579 def copyfileobj(src, dst, length=None):
580 """Copy length bytes from fileobj src to fileobj dst.
581@@ -186,242 +268,1186 @@
582
583 BUFSIZE = 16 * 1024
584 blocks, remainder = divmod(length, BUFSIZE)
585- for b in range(blocks): #@UnusedVariable
586+ for b in xrange(blocks):
587 buf = src.read(BUFSIZE)
588 if len(buf) < BUFSIZE:
589- raise IOError, "end of file reached"
590+ raise IOError("end of file reached")
591 dst.write(buf)
592
593 if remainder != 0:
594 buf = src.read(remainder)
595 if len(buf) < remainder:
596- raise IOError, "end of file reached"
597+ raise IOError("end of file reached")
598 dst.write(buf)
599 return
600
601 filemode_table = (
602- (S_IFLNK, "l",
603- S_IFREG, "-",
604- S_IFBLK, "b",
605- S_IFDIR, "d",
606- S_IFCHR, "c",
607- S_IFIFO, "p"),
608- (TUREAD, "r"),
609- (TUWRITE, "w"),
610- (TUEXEC, "x", TSUID, "S", TUEXEC|TSUID, "s"),
611- (TGREAD, "r"),
612- (TGWRITE, "w"),
613- (TGEXEC, "x", TSGID, "S", TGEXEC|TSGID, "s"),
614- (TOREAD, "r"),
615- (TOWRITE, "w"),
616- (TOEXEC, "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
617+ ((S_IFLNK, "l"),
618+ (S_IFREG, "-"),
619+ (S_IFBLK, "b"),
620+ (S_IFDIR, "d"),
621+ (S_IFCHR, "c"),
622+ (S_IFIFO, "p")),
623+
624+ ((TUREAD, "r"),),
625+ ((TUWRITE, "w"),),
626+ ((TUEXEC|TSUID, "s"),
627+ (TSUID, "S"),
628+ (TUEXEC, "x")),
629+
630+ ((TGREAD, "r"),),
631+ ((TGWRITE, "w"),),
632+ ((TGEXEC|TSGID, "s"),
633+ (TSGID, "S"),
634+ (TGEXEC, "x")),
635+
636+ ((TOREAD, "r"),),
637+ ((TOWRITE, "w"),),
638+ ((TOEXEC|TSVTX, "t"),
639+ (TSVTX, "T"),
640+ (TOEXEC, "x"))
641+)
642
643 def filemode(mode):
644 """Convert a file's mode to a string of the form
645 -rwxrwxrwx.
646 Used by TarFile.list()
647 """
648- s = ""
649- for t in filemode_table:
650- while 1:
651- if mode & t[0] == t[0]:
652- s += t[1]
653- elif len(t) > 2:
654- t = t[2:]
655- continue
656- else:
657- s += "-"
658- break
659- return s
660-
661-if os.sep != "/":
662- normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
663-else:
664- normpath = os.path.normpath
665+ perm = []
666+ for table in filemode_table:
667+ for bit, char in table:
668+ if mode & bit == bit:
669+ perm.append(char)
670+ break
671+ else:
672+ perm.append("-")
673+ return "".join(perm)
674
675 class TarError(Exception):
676- """Internally used exception"""
677- pass
678-
679-#--------------------
680-# exported functions
681-#--------------------
682-def open(name, mode="r", fileobj=None):
683- """Open (uncompressed) tar archive name for reading, writing
684- or appending.
685- """
686- return TarFile(name, mode, fileobj)
687-
688-def gzopen(gzname, gzmode="r", compresslevel=9, fileobj=None):
689- """Open gzip compressed tar archive name for reading or writing.
690- Appending is not allowed.
691- """
692- if gzmode == "a":
693- raise ValueError, "Appending to gzipped archive is not allowed"
694- import gzip
695- pre, ext = os.path.splitext(gzname)
696- pre = os.path.basename(pre)
697- if ext == ".tgz":
698- ext = ".tar"
699- if ext == ".gz":
700- ext = ""
701- tarname = pre + ext
702- mode = gzmode
703- if "b" not in gzmode:
704- gzmode += "b"
705- if mode[0:1] == "w":
706- if not fileobj:
707- fileobj = __builtin__.file(gzname, gzmode)
708- t = TarFile(tarname, mode, gzip.GzipFile(tarname, gzmode,
709- compresslevel, fileobj))
710- else:
711- t = TarFile(tarname, mode, gzip.open(gzname, gzmode, compresslevel))
712- t._extfileobj = 0
713- return t
714-
715-def is_tarfile(name):
716- """Return True if name points to a tar archive that we
717- are able to handle, else return False.
718- """
719-
720- buftoinfo = TarFile.__dict__["_buftoinfo"]
721- try:
722- buf = __builtin__.open(name, "rb").read(BLOCKSIZE)
723- buftoinfo(None, buf)
724- return True
725- except (ValueError, ImportError):
726- pass
727- try:
728- import gzip
729- buf = gzip.open(name, "rb").read(BLOCKSIZE)
730- buftoinfo(None, buf)
731- return True
732- except (IOError, ValueError, ImportError):
733- pass
734- return False
735+ """Base exception."""
736+ pass
737+class ExtractError(TarError):
738+ """General exception for extract errors."""
739+ pass
740+class ReadError(TarError):
741+ """Exception for unreadble tar archives."""
742+ pass
743+class CompressionError(TarError):
744+ """Exception for unavailable compression methods."""
745+ pass
746+class StreamError(TarError):
747+ """Exception for unsupported operations on stream-like TarFiles."""
748+ pass
749+class HeaderError(TarError):
750+ """Base exception for header errors."""
751+ pass
752+class EmptyHeaderError(HeaderError):
753+ """Exception for empty headers."""
754+ pass
755+class TruncatedHeaderError(HeaderError):
756+ """Exception for truncated headers."""
757+ pass
758+class EOFHeaderError(HeaderError):
759+ """Exception for end of file headers."""
760+ pass
761+class InvalidHeaderError(HeaderError):
762+ """Exception for invalid headers."""
763+ pass
764+class SubsequentHeaderError(HeaderError):
765+ """Exception for missing and invalid extended headers."""
766+ pass
767+
768+#---------------------------
769+# internal stream interface
770+#---------------------------
771+class _LowLevelFile:
772+ """Low-level file object. Supports reading and writing.
773+ It is used instead of a regular file object for streaming
774+ access.
775+ """
776+
777+ def __init__(self, name, mode):
778+ mode = {
779+ "r": os.O_RDONLY,
780+ "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
781+ }[mode]
782+ if hasattr(os, "O_BINARY"):
783+ mode |= os.O_BINARY
784+ self.fd = os.open(name, mode, 0666)
785+
786+ def close(self):
787+ os.close(self.fd)
788+
789+ def read(self, size):
790+ return os.read(self.fd, size)
791+
792+ def write(self, s):
793+ os.write(self.fd, s)
794+
795+class _Stream:
796+ """Class that serves as an adapter between TarFile and
797+ a stream-like object. The stream-like object only
798+ needs to have a read() or write() method and is accessed
799+ blockwise. Use of gzip or bzip2 compression is possible.
800+ A stream-like object could be for example: sys.stdin,
801+ sys.stdout, a socket, a tape device etc.
802+
803+ _Stream is intended to be used only internally.
804+ """
805+
806+ def __init__(self, name, mode, comptype, fileobj, bufsize):
807+ """Construct a _Stream object.
808+ """
809+ self._extfileobj = True
810+ if fileobj is None:
811+ fileobj = _LowLevelFile(name, mode)
812+ self._extfileobj = False
813+
814+ if comptype == '*':
815+ # Enable transparent compression detection for the
816+ # stream interface
817+ fileobj = _StreamProxy(fileobj)
818+ comptype = fileobj.getcomptype()
819+
820+ self.name = name or ""
821+ self.mode = mode
822+ self.comptype = comptype
823+ self.fileobj = fileobj
824+ self.bufsize = bufsize
825+ self.buf = ""
826+ self.pos = 0L
827+ self.closed = False
828+
829+ if comptype == "gz":
830+ try:
831+ import zlib
832+ except ImportError:
833+ raise CompressionError("zlib module is not available")
834+ self.zlib = zlib
835+ self.crc = zlib.crc32("") & 0xffffffffL
836+ if mode == "r":
837+ self._init_read_gz()
838+ else:
839+ self._init_write_gz()
840+
841+ if comptype == "bz2":
842+ try:
843+ import bz2
844+ except ImportError:
845+ raise CompressionError("bz2 module is not available")
846+ if mode == "r":
847+ self.dbuf = ""
848+ self.cmp = bz2.BZ2Decompressor()
849+ else:
850+ self.cmp = bz2.BZ2Compressor()
851+
852+ def __del__(self):
853+ if hasattr(self, "closed") and not self.closed:
854+ self.close()
855+
856+ def _init_write_gz(self):
857+ """Initialize for writing with gzip compression.
858+ """
859+ self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
860+ -self.zlib.MAX_WBITS,
861+ self.zlib.DEF_MEM_LEVEL,
862+ 0)
863+ timestamp = struct.pack("<L", long(time.time()))
864+ self.__write("\037\213\010\010%s\002\377" % timestamp)
865+ if self.name.endswith(".gz"):
866+ self.name = self.name[:-3]
867+ self.__write(self.name + NUL)
868+
869+ def write(self, s):
870+ """Write string s to the stream.
871+ """
872+ if self.comptype == "gz":
873+ self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
874+ self.pos += len(s)
875+ if self.comptype != "tar":
876+ s = self.cmp.compress(s)
877+ self.__write(s)
878+
879+ def __write(self, s):
880+ """Write string s to the stream if a whole new block
881+ is ready to be written.
882+ """
883+ self.buf += s
884+ while len(self.buf) > self.bufsize:
885+ self.fileobj.write(self.buf[:self.bufsize])
886+ self.buf = self.buf[self.bufsize:]
887+
888+ def close(self):
889+ """Close the _Stream object. No operation should be
890+ done on it afterwards.
891+ """
892+ if self.closed:
893+ return
894+
895+ if self.mode == "w" and self.comptype != "tar":
896+ self.buf += self.cmp.flush()
897+
898+ if self.mode == "w" and self.buf:
899+ self.fileobj.write(self.buf)
900+ self.buf = ""
901+ if self.comptype == "gz":
902+ # The native zlib crc is an unsigned 32-bit integer, but
903+ # the Python wrapper implicitly casts that to a signed C
904+ # long. So, on a 32-bit box self.crc may "look negative",
905+ # while the same crc on a 64-bit box may "look positive".
906+ # To avoid irksome warnings from the `struct` module, force
907+ # it to look positive on all boxes.
908+ self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
909+ self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
910+
911+ if not self._extfileobj:
912+ self.fileobj.close()
913+
914+ self.closed = True
915+
916+ def _init_read_gz(self):
917+ """Initialize for reading a gzip compressed fileobj.
918+ """
919+ self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
920+ self.dbuf = ""
921+
922+ # taken from gzip.GzipFile with some alterations
923+ if self.__read(2) != "\037\213":
924+ raise ReadError("not a gzip file")
925+ if self.__read(1) != "\010":
926+ raise CompressionError("unsupported compression method")
927+
928+ flag = ord(self.__read(1))
929+ self.__read(6)
930+
931+ if flag & 4:
932+ xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
933+ self.read(xlen)
934+ if flag & 8:
935+ while True:
936+ s = self.__read(1)
937+ if not s or s == NUL:
938+ break
939+ if flag & 16:
940+ while True:
941+ s = self.__read(1)
942+ if not s or s == NUL:
943+ break
944+ if flag & 2:
945+ self.__read(2)
946+
947+ def tell(self):
948+ """Return the stream's file pointer position.
949+ """
950+ return self.pos
951+
952+ def seek(self, pos=0):
953+ """Set the stream's file pointer to pos. Negative seeking
954+ is forbidden.
955+ """
956+ if pos - self.pos >= 0:
957+ blocks, remainder = divmod(pos - self.pos, self.bufsize)
958+ for i in xrange(blocks):
959+ self.read(self.bufsize)
960+ self.read(remainder)
961+ else:
962+ raise StreamError("seeking backwards is not allowed")
963+ return self.pos
964+
965+ def read(self, size=None):
966+ """Return the next size number of bytes from the stream.
967+ If size is not defined, return all bytes of the stream
968+ up to EOF.
969+ """
970+ if size is None:
971+ t = []
972+ while True:
973+ buf = self._read(self.bufsize)
974+ if not buf:
975+ break
976+ t.append(buf)
977+ buf = "".join(t)
978+ else:
979+ buf = self._read(size)
980+ self.pos += len(buf)
981+ return buf
982+
983+ def _read(self, size):
984+ """Return size bytes from the stream.
985+ """
986+ if self.comptype == "tar":
987+ return self.__read(size)
988+
989+ c = len(self.dbuf)
990+ t = [self.dbuf]
991+ while c < size:
992+ buf = self.__read(self.bufsize)
993+ if not buf:
994+ break
995+ try:
996+ buf = self.cmp.decompress(buf)
997+ except IOError:
998+ raise ReadError("invalid compressed data")
999+ t.append(buf)
1000+ c += len(buf)
1001+ t = "".join(t)
1002+ self.dbuf = t[size:]
1003+ return t[:size]
1004+
1005+ def __read(self, size):
1006+ """Return size bytes from stream. If internal buffer is empty,
1007+ read another block from the stream.
1008+ """
1009+ c = len(self.buf)
1010+ t = [self.buf]
1011+ while c < size:
1012+ buf = self.fileobj.read(self.bufsize)
1013+ if not buf:
1014+ break
1015+ t.append(buf)
1016+ c += len(buf)
1017+ t = "".join(t)
1018+ self.buf = t[size:]
1019+ return t[:size]
1020+# class _Stream
1021+
1022+class _StreamProxy(object):
1023+ """Small proxy class that enables transparent compression
1024+ detection for the Stream interface (mode 'r|*').
1025+ """
1026+
1027+ def __init__(self, fileobj):
1028+ self.fileobj = fileobj
1029+ self.buf = self.fileobj.read(BLOCKSIZE)
1030+
1031+ def read(self, size):
1032+ self.read = self.fileobj.read
1033+ return self.buf
1034+
1035+ def getcomptype(self):
1036+ if self.buf.startswith("\037\213\010"):
1037+ return "gz"
1038+ if self.buf.startswith("BZh91"):
1039+ return "bz2"
1040+ return "tar"
1041+
1042+ def close(self):
1043+ self.fileobj.close()
1044+# class StreamProxy
1045+
1046+class _BZ2Proxy(object):
1047+ """Small proxy class that enables external file object
1048+ support for "r:bz2" and "w:bz2" modes. This is actually
1049+ a workaround for a limitation in bz2 module's BZ2File
1050+ class which (unlike gzip.GzipFile) has no support for
1051+ a file object argument.
1052+ """
1053+
1054+ blocksize = 16 * 1024
1055+
1056+ def __init__(self, fileobj, mode):
1057+ self.fileobj = fileobj
1058+ self.mode = mode
1059+ self.name = getattr(self.fileobj, "name", None)
1060+ self.init()
1061+
1062+ def init(self):
1063+ import bz2
1064+ self.pos = 0
1065+ if self.mode == "r":
1066+ self.bz2obj = bz2.BZ2Decompressor()
1067+ self.fileobj.seek(0)
1068+ self.buf = ""
1069+ else:
1070+ self.bz2obj = bz2.BZ2Compressor()
1071+
1072+ def read(self, size):
1073+ b = [self.buf]
1074+ x = len(self.buf)
1075+ while x < size:
1076+ raw = self.fileobj.read(self.blocksize)
1077+ if not raw:
1078+ break
1079+ data = self.bz2obj.decompress(raw)
1080+ b.append(data)
1081+ x += len(data)
1082+ self.buf = "".join(b)
1083+
1084+ buf = self.buf[:size]
1085+ self.buf = self.buf[size:]
1086+ self.pos += len(buf)
1087+ return buf
1088+
1089+ def seek(self, pos):
1090+ if pos < self.pos:
1091+ self.init()
1092+ self.read(pos - self.pos)
1093+
1094+ def tell(self):
1095+ return self.pos
1096+
1097+ def write(self, data):
1098+ self.pos += len(data)
1099+ raw = self.bz2obj.compress(data)
1100+ self.fileobj.write(raw)
1101+
1102+ def close(self):
1103+ if self.mode == "w":
1104+ raw = self.bz2obj.flush()
1105+ self.fileobj.write(raw)
1106+# class _BZ2Proxy
1107+
1108+#------------------------
1109+# Extraction file object
1110+#------------------------
1111+class _FileInFile(object):
1112+ """A thin wrapper around an existing file object that
1113+ provides a part of its data as an individual file
1114+ object.
1115+ """
1116+
1117+ def __init__(self, fileobj, offset, size, sparse=None):
1118+ self.fileobj = fileobj
1119+ self.offset = offset
1120+ self.size = size
1121+ self.sparse = sparse
1122+ self.position = 0
1123+
1124+ def tell(self):
1125+ """Return the current file position.
1126+ """
1127+ return self.position
1128+
1129+ def seek(self, position):
1130+ """Seek to a position in the file.
1131+ """
1132+ self.position = position
1133+
1134+ def read(self, size=None):
1135+ """Read data from the file.
1136+ """
1137+ if size is None:
1138+ size = self.size - self.position
1139+ else:
1140+ size = min(size, self.size - self.position)
1141+
1142+ if self.sparse is None:
1143+ return self.readnormal(size)
1144+ else:
1145+ return self.readsparse(size)
1146+
1147+ def readnormal(self, size):
1148+ """Read operation for regular files.
1149+ """
1150+ self.fileobj.seek(self.offset + self.position)
1151+ self.position += size
1152+ return self.fileobj.read(size)
1153+
1154+ def readsparse(self, size):
1155+ """Read operation for sparse files.
1156+ """
1157+ data = []
1158+ while size > 0:
1159+ buf = self.readsparsesection(size)
1160+ if not buf:
1161+ break
1162+ size -= len(buf)
1163+ data.append(buf)
1164+ return "".join(data)
1165+
1166+ def readsparsesection(self, size):
1167+ """Read a single section of a sparse file.
1168+ """
1169+ section = self.sparse.find(self.position)
1170+
1171+ if section is None:
1172+ return ""
1173+
1174+ size = min(size, section.offset + section.size - self.position)
1175+
1176+ if isinstance(section, _data):
1177+ realpos = section.realpos + self.position - section.offset
1178+ self.fileobj.seek(self.offset + realpos)
1179+ self.position += size
1180+ return self.fileobj.read(size)
1181+ else:
1182+ self.position += size
1183+ return NUL * size
1184+#class _FileInFile
1185+
1186+
1187+class ExFileObject(object):
1188+ """File-like object for reading an archive member.
1189+ Is returned by TarFile.extractfile().
1190+ """
1191+ blocksize = 1024
1192+
1193+ def __init__(self, tarfile, tarinfo):
1194+ self.fileobj = _FileInFile(tarfile.fileobj,
1195+ tarinfo.offset_data,
1196+ tarinfo.size,
1197+ getattr(tarinfo, "sparse", None))
1198+ self.name = tarinfo.name
1199+ self.mode = "r"
1200+ self.closed = False
1201+ self.size = tarinfo.size
1202+
1203+ self.position = 0
1204+ self.buffer = ""
1205+
1206+ def read(self, size=None):
1207+ """Read at most size bytes from the file. If size is not
1208+ present or None, read all data until EOF is reached.
1209+ """
1210+ if self.closed:
1211+ raise ValueError("I/O operation on closed file")
1212+
1213+ buf = ""
1214+ if self.buffer:
1215+ if size is None:
1216+ buf = self.buffer
1217+ self.buffer = ""
1218+ else:
1219+ buf = self.buffer[:size]
1220+ self.buffer = self.buffer[size:]
1221+
1222+ if size is None:
1223+ buf += self.fileobj.read()
1224+ else:
1225+ buf += self.fileobj.read(size - len(buf))
1226+
1227+ self.position += len(buf)
1228+ return buf
1229+
1230+ def readline(self, size=-1):
1231+ """Read one entire line from the file. If size is present
1232+ and non-negative, return a string with at most that
1233+ size, which may be an incomplete line.
1234+ """
1235+ if self.closed:
1236+ raise ValueError("I/O operation on closed file")
1237+
1238+ if "\n" in self.buffer:
1239+ pos = self.buffer.find("\n") + 1
1240+ else:
1241+ buffers = [self.buffer]
1242+ while True:
1243+ buf = self.fileobj.read(self.blocksize)
1244+ buffers.append(buf)
1245+ if not buf or "\n" in buf:
1246+ self.buffer = "".join(buffers)
1247+ pos = self.buffer.find("\n") + 1
1248+ if pos == 0:
1249+ # no newline found.
1250+ pos = len(self.buffer)
1251+ break
1252+
1253+ if size != -1:
1254+ pos = min(size, pos)
1255+
1256+ buf = self.buffer[:pos]
1257+ self.buffer = self.buffer[pos:]
1258+ self.position += len(buf)
1259+ return buf
1260+
1261+ def readlines(self):
1262+ """Return a list with all remaining lines.
1263+ """
1264+ result = []
1265+ while True:
1266+ line = self.readline()
1267+ if not line: break
1268+ result.append(line)
1269+ return result
1270+
1271+ def tell(self):
1272+ """Return the current file position.
1273+ """
1274+ if self.closed:
1275+ raise ValueError("I/O operation on closed file")
1276+
1277+ return self.position
1278+
1279+ def seek(self, pos, whence=0):
1280+ """Seek to a position in the file.
1281+ """
1282+ if self.closed:
1283+ raise ValueError("I/O operation on closed file")
1284+
1285+ if whence == 0:
1286+ self.position = min(max(pos, 0), self.size)
1287+ elif whence == 1:
1288+ if pos < 0:
1289+ self.position = max(self.position + pos, 0)
1290+ else:
1291+ self.position = min(self.position + pos, self.size)
1292+ elif whence == 2:
1293+ self.position = max(min(self.size + pos, self.size), 0)
1294+ else:
1295+ raise ValueError("Invalid argument")
1296+
1297+ self.buffer = ""
1298+ self.fileobj.seek(self.position)
1299+
1300+ def close(self):
1301+ """Close the file object.
1302+ """
1303+ self.closed = True
1304+
1305+ def __iter__(self):
1306+ """Get an iterator over the file's lines.
1307+ """
1308+ while True:
1309+ line = self.readline()
1310+ if not line:
1311+ break
1312+ yield line
1313+#class ExFileObject
1314
1315 #------------------
1316 # Exported Classes
1317 #------------------
1318-class TarInfo:
1319+class TarInfo(object):
1320 """Informational class which holds the details about an
1321 archive member given by a tar header block.
1322- TarInfo instances are returned by TarFile.getmember() and
1323- TarFile.getmembers() and are usually created internally.
1324- If you want to create a TarInfo instance from the outside,
1325- you should use TarFile.gettarinfo() if the file already exists,
1326- or you can instanciate the class yourself.
1327+ TarInfo objects are returned by TarFile.getmember(),
1328+ TarFile.getmembers() and TarFile.gettarinfo() and are
1329+ usually created internally.
1330 """
1331
1332 def __init__(self, name=""):
1333- """Construct a TarInfo instance. name is the optional name
1334+ """Construct a TarInfo object. name is the optional name
1335 of the member.
1336 """
1337-
1338- self.name = name # member name (dirnames must end with '/')
1339- self.mode = 0100666 # file permissions
1340- self.uid = 0 # user id
1341- self.gid = 0 # group id
1342- self.size = 0 # file size
1343- self.mtime = 0 # modification time
1344- self.chksum = 0 # header checksum
1345- self.type = REGTYPE # member type
1346- self.linkname = "" # link name
1347- self.uname = "user" # user name
1348- self.gname = "group" # group name
1349- self.devmajor = 0 #-
1350- self.devminor = 0 #-for use with CHRTYPE and BLKTYPE
1351- self.prefix = "" # prefix, holding information
1352-# # about sparse files
1353-
1354- self.offset = 0 # the tar header starts here
1355- self.offset_data = 0 # the optional file's data starts here
1356-
1357- def init_from_stat(self, statres):
1358- """Initialize various attributes from statobj (these are
1359- returned by os.stat() and related functions. Return none on error"""
1360- stmd = statres.st_mode
1361- if stat.S_ISREG(stmd): type = REGTYPE
1362- elif stat.S_ISDIR(stmd):
1363- type = DIRTYPE
1364- if self.name[-1:] != "/": self.name += "/"
1365- elif stat.S_ISFIFO(stmd): type = FIFOTYPE
1366- elif stat.S_ISLNK(stmd): type = SYMTYPE
1367- elif stat.S_ISCHR(stmd): type = CHRTYPE
1368- elif stat.S_ISBLK(stmd): type = BLKTYPE
1369- else: return None
1370-
1371- # Fill the TarInfo instance with all
1372- # information we can get.
1373- self.mode = stat.S_IMODE(stmd)
1374- self.uid = statres.st_uid
1375- self.gid = statres.st_gid
1376- self.size = statres.st_size
1377- self.mtime = statres.st_mtime
1378- self.type = type
1379- if pwd:
1380- try: self.uname = uid2uname(self.uid)
1381- except KeyError: pass
1382- if grp:
1383- try: self.gname = gid2gname(self.gid)
1384- except KeyError: pass
1385-
1386- if type in (CHRTYPE, BLKTYPE):
1387- if hasattr(os, "major") and hasattr(os, "minor"):
1388- self.devmajor = os.major(statres.st_rdev)
1389- self.devminor = os.minor(statres.st_rdev)
1390- return 1
1391-
1392- def set_arcname(self, name):
1393- """Set the name of the member in the archive. Backward
1394- slashes are converted to forward slashes, Absolute paths are
1395- turned to relative paths.
1396- """
1397- arcname = normpath(name)
1398- drv, arcname = os.path.splitdrive(arcname) #@UnusedVariable
1399- while arcname[0:1] == "/":
1400- arcname = arcname[1:]
1401- self.name = arcname
1402-
1403- def getheader(self):
1404- """Return a tar header block as a 512 byte string.
1405- """
1406- if self.uid > 2097151 or self.uid < 0:
1407- sys.stderr.write("uid %i of file %s not in range. Setting uid to 60001\n" % (self.uid,self.name))
1408- self.uid = 60001
1409- if self.gid > 2097151 or self.gid < 0:
1410- sys.stderr.write("gid %i of file %s not in range. Setting gid to 60001\n" % (self.gid, self.name))
1411- self.gid = 60001
1412- # The following code was contributed by Detlef Lannert.
1413- parts = []
1414- for value, fieldsize in (
1415- (self.name, 100),
1416- ("%07o" % self.mode, 8),
1417- ("%07o" % self.uid, 8),
1418- ("%07o" % self.gid, 8),
1419- ("%011o" % self.size, 12),
1420- ("%011o" % self.mtime, 12),
1421- (" ", 8),
1422- (self.type, 1),
1423- (self.linkname, 100),
1424- (MAGIC, 6),
1425- (VERSION, 2),
1426- (self.uname, 32),
1427- (self.gname, 32),
1428- ("%07o" % self.devmajor, 8),
1429- ("%07o" % self.devminor, 8),
1430- (self.prefix, 155)
1431- ):
1432- l = len(value)
1433- parts.append(value + (fieldsize - l) * "\0")
1434-
1435- buf = "".join(parts)
1436- chksum = calc_chksum(buf)
1437- buf = buf[:148] + "%06o\0" % chksum + buf[155:]
1438- buf += (512 - len(buf)) * "\0"
1439- self.buf = buf
1440+ self.name = name # member name
1441+ self.mode = 0644 # file permissions
1442+ self.uid = 0 # user id
1443+ self.gid = 0 # group id
1444+ self.size = 0 # file size
1445+ self.mtime = 0 # modification time
1446+ self.chksum = 0 # header checksum
1447+ self.type = REGTYPE # member type
1448+ self.linkname = "" # link name
1449+ self.uname = "" # user name
1450+ self.gname = "" # group name
1451+ self.devmajor = 0 # device major number
1452+ self.devminor = 0 # device minor number
1453+
1454+ self.offset = 0 # the tar header starts here
1455+ self.offset_data = 0 # the file's data starts here
1456+
1457+ self.pax_headers = {} # pax header information
1458+
1459+ # In pax headers the "name" and "linkname" field are called
1460+ # "path" and "linkpath".
1461+ def _getpath(self):
1462+ return self.name
1463+ def _setpath(self, name):
1464+ self.name = name
1465+ path = property(_getpath, _setpath)
1466+
1467+ def _getlinkpath(self):
1468+ return self.linkname
1469+ def _setlinkpath(self, linkname):
1470+ self.linkname = linkname
1471+ linkpath = property(_getlinkpath, _setlinkpath)
1472+
1473+ def __repr__(self):
1474+ return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
1475+
1476+ def get_info(self, encoding, errors):
1477+ """Return the TarInfo's attributes as a dictionary.
1478+ """
1479+ info = {
1480+ "name": self.name,
1481+ "mode": self.mode & 07777,
1482+ "uid": self.uid,
1483+ "gid": self.gid,
1484+ "size": self.size,
1485+ "mtime": self.mtime,
1486+ "chksum": self.chksum,
1487+ "type": self.type,
1488+ "linkname": self.linkname,
1489+ "uname": self.uname,
1490+ "gname": self.gname,
1491+ "devmajor": self.devmajor,
1492+ "devminor": self.devminor
1493+ }
1494+
1495+ if info["type"] == DIRTYPE and not info["name"].endswith("/"):
1496+ info["name"] += "/"
1497+
1498+ for key in ("name", "linkname", "uname", "gname"):
1499+ if type(info[key]) is unicode:
1500+ info[key] = info[key].encode(encoding, errors)
1501+
1502+ return info
1503+
1504+ def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
1505+ """Return a tar header as a string of 512 byte blocks.
1506+ """
1507+ info = self.get_info(encoding, errors)
1508+
1509+ if format == USTAR_FORMAT:
1510+ return self.create_ustar_header(info)
1511+ elif format == GNU_FORMAT:
1512+ return self.create_gnu_header(info)
1513+ elif format == PAX_FORMAT:
1514+ return self.create_pax_header(info, encoding, errors)
1515+ else:
1516+ raise ValueError("invalid format")
1517+
1518+ def create_ustar_header(self, info):
1519+ """Return the object as a ustar header block.
1520+ """
1521+ info["magic"] = POSIX_MAGIC
1522+
1523+ if len(info["linkname"]) > LENGTH_LINK:
1524+ raise ValueError("linkname is too long")
1525+
1526+ if len(info["name"]) > LENGTH_NAME:
1527+ info["prefix"], info["name"] = self._posix_split_name(info["name"])
1528+
1529+ return self._create_header(info, USTAR_FORMAT)
1530+
1531+ def create_gnu_header(self, info):
1532+ """Return the object as a GNU header block sequence.
1533+ """
1534+ info["magic"] = GNU_MAGIC
1535+
1536+ buf = ""
1537+ if len(info["linkname"]) > LENGTH_LINK:
1538+ buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1539+
1540+ if len(info["name"]) > LENGTH_NAME:
1541+ buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1542+
1543+ return buf + self._create_header(info, GNU_FORMAT)
1544+
1545+ def create_pax_header(self, info, encoding, errors):
1546+ """Return the object as a ustar header block. If it cannot be
1547+ represented this way, prepend a pax extended header sequence
1548+ with supplement information.
1549+ """
1550+ info["magic"] = POSIX_MAGIC
1551+ pax_headers = self.pax_headers.copy()
1552+
1553+ # Test string fields for values that exceed the field length or cannot
1554+ # be represented in ASCII encoding.
1555+ for name, hname, length in (
1556+ ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1557+ ("uname", "uname", 32), ("gname", "gname", 32)):
1558+
1559+ if hname in pax_headers:
1560+ # The pax header has priority.
1561+ continue
1562+
1563+ val = info[name].decode(encoding, errors)
1564+
1565+ # Try to encode the string as ASCII.
1566+ try:
1567+ val.encode("ascii")
1568+ except UnicodeEncodeError:
1569+ pax_headers[hname] = val
1570+ continue
1571+
1572+ if len(info[name]) > length:
1573+ pax_headers[hname] = val
1574+
1575+ # Test number fields for values that exceed the field limit or values
1576+ # that like to be stored as float.
1577+ for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1578+ if name in pax_headers:
1579+ # The pax header has priority. Avoid overflow.
1580+ info[name] = 0
1581+ continue
1582+
1583+ val = info[name]
1584+ if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1585+ pax_headers[name] = unicode(val)
1586+ info[name] = 0
1587+
1588+ # Create a pax extended header if necessary.
1589+ if pax_headers:
1590+ buf = self._create_pax_generic_header(pax_headers)
1591+ else:
1592+ buf = ""
1593+
1594+ return buf + self._create_header(info, USTAR_FORMAT)
1595+
1596+ @classmethod
1597+ def create_pax_global_header(cls, pax_headers):
1598+ """Return the object as a pax global header block sequence.
1599+ """
1600+ return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1601+
1602+ def _posix_split_name(self, name):
1603+ """Split a name longer than 100 chars into a prefix
1604+ and a name part.
1605+ """
1606+ prefix = name[:LENGTH_PREFIX + 1]
1607+ while prefix and prefix[-1] != "/":
1608+ prefix = prefix[:-1]
1609+
1610+ name = name[len(prefix):]
1611+ prefix = prefix[:-1]
1612+
1613+ if not prefix or len(name) > LENGTH_NAME:
1614+ raise ValueError("name is too long")
1615+ return prefix, name
1616+
1617+ @staticmethod
1618+ def _create_header(info, format):
1619+ """Return a header block. info is a dictionary with file
1620+ information, format must be one of the *_FORMAT constants.
1621+ """
1622+ parts = [
1623+ stn(info.get("name", ""), 100),
1624+ itn(info.get("mode", 0) & 07777, 8, format),
1625+ itn(info.get("uid", 0), 8, format),
1626+ itn(info.get("gid", 0), 8, format),
1627+ itn(info.get("size", 0), 12, format),
1628+ itn(info.get("mtime", 0), 12, format),
1629+ " ", # checksum field
1630+ info.get("type", REGTYPE),
1631+ stn(info.get("linkname", ""), 100),
1632+ stn(info.get("magic", POSIX_MAGIC), 8),
1633+ stn(info.get("uname", ""), 32),
1634+ stn(info.get("gname", ""), 32),
1635+ itn(info.get("devmajor", 0), 8, format),
1636+ itn(info.get("devminor", 0), 8, format),
1637+ stn(info.get("prefix", ""), 155)
1638+ ]
1639+
1640+ buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1641+ chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1642+ buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1643 return buf
1644
1645+ @staticmethod
1646+ def _create_payload(payload):
1647+ """Return the string payload filled with zero bytes
1648+ up to the next 512 byte border.
1649+ """
1650+ blocks, remainder = divmod(len(payload), BLOCKSIZE)
1651+ if remainder > 0:
1652+ payload += (BLOCKSIZE - remainder) * NUL
1653+ return payload
1654+
1655+ @classmethod
1656+ def _create_gnu_long_header(cls, name, type):
1657+ """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1658+ for name.
1659+ """
1660+ name += NUL
1661+
1662+ info = {}
1663+ info["name"] = "././@LongLink"
1664+ info["type"] = type
1665+ info["size"] = len(name)
1666+ info["magic"] = GNU_MAGIC
1667+
1668+ # create extended header + name blocks.
1669+ return cls._create_header(info, USTAR_FORMAT) + \
1670+ cls._create_payload(name)
1671+
1672+ @classmethod
1673+ def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1674+ """Return a POSIX.1-2001 extended or global header sequence
1675+ that contains a list of keyword, value pairs. The values
1676+ must be unicode objects.
1677+ """
1678+ records = []
1679+ for keyword, value in pax_headers.iteritems():
1680+ keyword = keyword.encode("utf8")
1681+ value = value.encode("utf8")
1682+ l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1683+ n = p = 0
1684+ while True:
1685+ n = l + len(str(p))
1686+ if n == p:
1687+ break
1688+ p = n
1689+ records.append("%d %s=%s\n" % (p, keyword, value))
1690+ records = "".join(records)
1691+
1692+ # We use a hardcoded "././@PaxHeader" name like star does
1693+ # instead of the one that POSIX recommends.
1694+ info = {}
1695+ info["name"] = "././@PaxHeader"
1696+ info["type"] = type
1697+ info["size"] = len(records)
1698+ info["magic"] = POSIX_MAGIC
1699+
1700+ # Create pax header + record blocks.
1701+ return cls._create_header(info, USTAR_FORMAT) + \
1702+ cls._create_payload(records)
1703+
1704+ @classmethod
1705+ def frombuf(cls, buf):
1706+ """Construct a TarInfo object from a 512 byte string buffer.
1707+ """
1708+ if len(buf) == 0:
1709+ raise EmptyHeaderError("empty header")
1710+ if len(buf) != BLOCKSIZE:
1711+ raise TruncatedHeaderError("truncated header")
1712+ if buf.count(NUL) == BLOCKSIZE:
1713+ raise EOFHeaderError("end of file header")
1714+
1715+ chksum = nti(buf[148:156])
1716+ if chksum not in calc_chksums(buf):
1717+ raise InvalidHeaderError("bad checksum")
1718+
1719+ obj = cls()
1720+ obj.buf = buf
1721+ obj.name = nts(buf[0:100])
1722+ obj.mode = nti(buf[100:108])
1723+ obj.uid = nti(buf[108:116])
1724+ obj.gid = nti(buf[116:124])
1725+ obj.size = nti(buf[124:136])
1726+ obj.mtime = nti(buf[136:148])
1727+ obj.chksum = chksum
1728+ obj.type = buf[156:157]
1729+ obj.linkname = nts(buf[157:257])
1730+ obj.uname = nts(buf[265:297])
1731+ obj.gname = nts(buf[297:329])
1732+ obj.devmajor = nti(buf[329:337])
1733+ obj.devminor = nti(buf[337:345])
1734+ prefix = nts(buf[345:500])
1735+
1736+ # Old V7 tar format represents a directory as a regular
1737+ # file with a trailing slash.
1738+ if obj.type == AREGTYPE and obj.name.endswith("/"):
1739+ obj.type = DIRTYPE
1740+
1741+ # Remove redundant slashes from directories.
1742+ if obj.isdir():
1743+ obj.name = obj.name.rstrip("/")
1744+
1745+ # Reconstruct a ustar longname.
1746+ if prefix and obj.type not in GNU_TYPES:
1747+ obj.name = prefix + "/" + obj.name
1748+ return obj
1749+
1750+ @classmethod
1751+ def fromtarfile(cls, tarfile):
1752+ """Return the next TarInfo object from TarFile object
1753+ tarfile.
1754+ """
1755+ buf = tarfile.fileobj.read(BLOCKSIZE)
1756+ obj = cls.frombuf(buf)
1757+ obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1758+ return obj._proc_member(tarfile)
1759+
1760+ #--------------------------------------------------------------------------
1761+ # The following are methods that are called depending on the type of a
1762+ # member. The entry point is _proc_member() which can be overridden in a
1763+ # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1764+ # implement the following
1765+ # operations:
1766+ # 1. Set self.offset_data to the position where the data blocks begin,
1767+ # if there is data that follows.
1768+ # 2. Set tarfile.offset to the position where the next member's header will
1769+ # begin.
1770+ # 3. Return self or another valid TarInfo object.
1771+ def _proc_member(self, tarfile):
1772+ """Choose the right processing method depending on
1773+ the type and call it.
1774+ """
1775+ if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1776+ return self._proc_gnulong(tarfile)
1777+ elif self.type == GNUTYPE_SPARSE:
1778+ return self._proc_sparse(tarfile)
1779+ elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1780+ return self._proc_pax(tarfile)
1781+ else:
1782+ return self._proc_builtin(tarfile)
1783+
1784+ def _proc_builtin(self, tarfile):
1785+ """Process a builtin type or an unknown type which
1786+ will be treated as a regular file.
1787+ """
1788+ self.offset_data = tarfile.fileobj.tell()
1789+ offset = self.offset_data
1790+ if self.isreg() or self.type not in SUPPORTED_TYPES:
1791+ # Skip the following data blocks.
1792+ offset += self._block(self.size)
1793+ tarfile.offset = offset
1794+
1795+ # Patch the TarInfo object with saved global
1796+ # header information.
1797+ self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1798+
1799+ return self
1800+
1801+ def _proc_gnulong(self, tarfile):
1802+ """Process the blocks that hold a GNU longname
1803+ or longlink member.
1804+ """
1805+ buf = tarfile.fileobj.read(self._block(self.size))
1806+
1807+ # Fetch the next header and process it.
1808+ try:
1809+ next = self.fromtarfile(tarfile)
1810+ except HeaderError:
1811+ raise SubsequentHeaderError("missing or bad subsequent header")
1812+
1813+ # Patch the TarInfo object from the next header with
1814+ # the longname information.
1815+ next.offset = self.offset
1816+ if self.type == GNUTYPE_LONGNAME:
1817+ next.name = nts(buf)
1818+ elif self.type == GNUTYPE_LONGLINK:
1819+ next.linkname = nts(buf)
1820+
1821+ return next
1822+
1823+ def _proc_sparse(self, tarfile):
1824+ """Process a GNU sparse header plus extra headers.
1825+ """
1826+ buf = self.buf
1827+ sp = _ringbuffer()
1828+ pos = 386
1829+ lastpos = 0L
1830+ realpos = 0L
1831+ # There are 4 possible sparse structs in the
1832+ # first header.
1833+ for i in xrange(4):
1834+ try:
1835+ offset = nti(buf[pos:pos + 12])
1836+ numbytes = nti(buf[pos + 12:pos + 24])
1837+ except ValueError:
1838+ break
1839+ if offset > lastpos:
1840+ sp.append(_hole(lastpos, offset - lastpos))
1841+ sp.append(_data(offset, numbytes, realpos))
1842+ realpos += numbytes
1843+ lastpos = offset + numbytes
1844+ pos += 24
1845+
1846+ isextended = ord(buf[482])
1847+ origsize = nti(buf[483:495])
1848+
1849+ # If the isextended flag is given,
1850+ # there are extra headers to process.
1851+ while isextended == 1:
1852+ buf = tarfile.fileobj.read(BLOCKSIZE)
1853+ pos = 0
1854+ for i in xrange(21):
1855+ try:
1856+ offset = nti(buf[pos:pos + 12])
1857+ numbytes = nti(buf[pos + 12:pos + 24])
1858+ except ValueError:
1859+ break
1860+ if offset > lastpos:
1861+ sp.append(_hole(lastpos, offset - lastpos))
1862+ sp.append(_data(offset, numbytes, realpos))
1863+ realpos += numbytes
1864+ lastpos = offset + numbytes
1865+ pos += 24
1866+ isextended = ord(buf[504])
1867+
1868+ if lastpos < origsize:
1869+ sp.append(_hole(lastpos, origsize - lastpos))
1870+
1871+ self.sparse = sp
1872+
1873+ self.offset_data = tarfile.fileobj.tell()
1874+ tarfile.offset = self.offset_data + self._block(self.size)
1875+ self.size = origsize
1876+
1877+ return self
1878+
1879+ def _proc_pax(self, tarfile):
1880+ """Process an extended or global header as described in
1881+ POSIX.1-2001.
1882+ """
1883+ # Read the header information.
1884+ buf = tarfile.fileobj.read(self._block(self.size))
1885+
1886+ # A pax header stores supplemental information for either
1887+ # the following file (extended) or all following files
1888+ # (global).
1889+ if self.type == XGLTYPE:
1890+ pax_headers = tarfile.pax_headers
1891+ else:
1892+ pax_headers = tarfile.pax_headers.copy()
1893+
1894+ # Parse pax header information. A record looks like that:
1895+ # "%d %s=%s\n" % (length, keyword, value). length is the size
1896+ # of the complete record including the length field itself and
1897+ # the newline. keyword and value are both UTF-8 encoded strings.
1898+ regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1899+ pos = 0
1900+ while True:
1901+ match = regex.match(buf, pos)
1902+ if not match:
1903+ break
1904+
1905+ length, keyword = match.groups()
1906+ length = int(length)
1907+ value = buf[match.end(2) + 1:match.start(1) + length - 1]
1908+
1909+ keyword = keyword.decode("utf8")
1910+ value = value.decode("utf8")
1911+
1912+ pax_headers[keyword] = value
1913+ pos += length
1914+
1915+ # Fetch the next header.
1916+ try:
1917+ next = self.fromtarfile(tarfile)
1918+ except HeaderError:
1919+ raise SubsequentHeaderError("missing or bad subsequent header")
1920+
1921+ if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1922+ # Patch the TarInfo object with the extended header info.
1923+ next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1924+ next.offset = self.offset
1925+
1926+ if "size" in pax_headers:
1927+ # If the extended header replaces the size field,
1928+ # we need to recalculate the offset where the next
1929+ # header starts.
1930+ offset = next.offset_data
1931+ if next.isreg() or next.type not in SUPPORTED_TYPES:
1932+ offset += next._block(next.size)
1933+ tarfile.offset = offset
1934+
1935+ return next
1936+
1937+ def _apply_pax_info(self, pax_headers, encoding, errors):
1938+ """Replace fields with supplemental information from a previous
1939+ pax extended or global header.
1940+ """
1941+ for keyword, value in pax_headers.iteritems():
1942+ if keyword not in PAX_FIELDS:
1943+ continue
1944+
1945+ if keyword == "path":
1946+ value = value.rstrip("/")
1947+
1948+ if keyword in PAX_NUMBER_FIELDS:
1949+ try:
1950+ value = PAX_NUMBER_FIELDS[keyword](value)
1951+ except ValueError:
1952+ value = 0
1953+ else:
1954+ value = uts(value, encoding, errors)
1955+
1956+ setattr(self, keyword, value)
1957+
1958+ self.pax_headers = pax_headers.copy()
1959+
1960+ def _block(self, count):
1961+ """Round up a byte count by BLOCKSIZE and return it,
1962+ e.g. _block(834) => 1024.
1963+ """
1964+ blocks, remainder = divmod(count, BLOCKSIZE)
1965+ if remainder:
1966+ blocks += 1
1967+ return blocks * BLOCKSIZE
1968+
1969 def isreg(self):
1970 return self.type in REGULAR_TYPES
1971 def isfile(self):
1972@@ -444,10 +1470,10 @@
1973 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1974 # class TarInfo
1975
1976-
1977-class TarFile:
1978- """Class representing a TAR archive file on disk.
1979+class TarFile(object):
1980+ """The TarFile Class provides an interface to tar archives.
1981 """
1982+
1983 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1984
1985 dereference = False # If true, add content of linked file to the
1986@@ -456,235 +1482,459 @@
1987 ignore_zeros = False # If true, skips empty or invalid blocks and
1988 # continues processing.
1989
1990- errorlevel = 0 # If 0, fatal errors only appear in debug
1991+ errorlevel = 1 # If 0, fatal errors only appear in debug
1992 # messages (if debug >= 0). If > 0, errors
1993 # are passed to the caller as exceptions.
1994
1995- def __init__(self, name=None, mode="r", fileobj=None):
1996- self.name = name
1997-
1998+ format = DEFAULT_FORMAT # The format to use when creating an archive.
1999+
2000+ encoding = ENCODING # Encoding for 8-bit character strings.
2001+
2002+ errors = None # Error handler for unicode conversion.
2003+
2004+ tarinfo = TarInfo # The default TarInfo class to use.
2005+
2006+ fileobject = ExFileObject # The default ExFileObject class to use.
2007+
2008+ def __init__(self, name=None, mode="r", fileobj=None, format=None,
2009+ tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
2010+ errors=None, pax_headers=None, debug=None, errorlevel=None):
2011+ """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
2012+ read from an existing archive, 'a' to append data to an existing
2013+ file or 'w' to create a new file overwriting an existing one. `mode'
2014+ defaults to 'r'.
2015+ If `fileobj' is given, it is used for reading or writing data. If it
2016+ can be determined, `mode' is overridden by `fileobj's mode.
2017+ `fileobj' is not closed, when TarFile is closed.
2018+ """
2019 if len(mode) > 1 or mode not in "raw":
2020- raise ValueError, "mode must be either 'r', 'a' or 'w', " \
2021- "not '%s'" % mode
2022- self._mode = mode
2023- self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
2024+ raise ValueError("mode must be 'r', 'a' or 'w'")
2025+ self.mode = mode
2026+ self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
2027
2028 if not fileobj:
2029- fileobj = __builtin__.file(self.name, self.mode)
2030- self._extfileobj = 0
2031+ if self.mode == "a" and not os.path.exists(name):
2032+ # Create nonexistent files in append mode.
2033+ self.mode = "w"
2034+ self._mode = "wb"
2035+ fileobj = bltn_open(name, self._mode)
2036+ self._extfileobj = False
2037 else:
2038- if self.name is None and hasattr(fileobj, "name"):
2039- self.name = fileobj.name
2040+ if name is None and hasattr(fileobj, "name"):
2041+ name = fileobj.name
2042 if hasattr(fileobj, "mode"):
2043- self.mode = fileobj.mode
2044- self._extfileobj = 1
2045+ self._mode = fileobj.mode
2046+ self._extfileobj = True
2047+ if name:
2048+ self.name = os.path.abspath(name)
2049+ else:
2050+ self.name = None
2051 self.fileobj = fileobj
2052
2053- self.init_datastructures()
2054-
2055- if self._mode == "a":
2056- self.fileobj.seek(0)
2057- self._load()
2058-
2059- def init_datastructures(self):
2060- # Init datastructures
2061- #self.members = [] # list of members as TarInfo instances
2062- #self.membernames = [] # names of members
2063- #self.chunks = [0] # chunk cache
2064- self._loaded = 0 # flag if all members have been read
2065- self.offset = 0l # current position in the archive file
2066- self.inodes = {} # dictionary caching the inodes of
2067- # archive members already added
2068- self.next_chunk = 0 # offset of next tarinfo, used when reading
2069-
2070- def close(self):
2071- """Close the TarFile instance and do some cleanup.
2072- """
2073- if self.fileobj:
2074- if self._mode in "aw":
2075- # fill up the end with zero-blocks
2076- # (like option -b20 for tar does)
2077- blocks, remainder = divmod(self.offset, RECORDSIZE) #@UnusedVariable
2078- if remainder > 0:
2079- self.fileobj.write("\0" * (RECORDSIZE - remainder))
2080-
2081+ # Init attributes.
2082+ if format is not None:
2083+ self.format = format
2084+ if tarinfo is not None:
2085+ self.tarinfo = tarinfo
2086+ if dereference is not None:
2087+ self.dereference = dereference
2088+ if ignore_zeros is not None:
2089+ self.ignore_zeros = ignore_zeros
2090+ if encoding is not None:
2091+ self.encoding = encoding
2092+
2093+ if errors is not None:
2094+ self.errors = errors
2095+ elif mode == "r":
2096+ self.errors = "utf-8"
2097+ else:
2098+ self.errors = "strict"
2099+
2100+ if pax_headers is not None and self.format == PAX_FORMAT:
2101+ self.pax_headers = pax_headers
2102+ else:
2103+ self.pax_headers = {}
2104+
2105+ if debug is not None:
2106+ self.debug = debug
2107+ if errorlevel is not None:
2108+ self.errorlevel = errorlevel
2109+
2110+ # Init datastructures.
2111+ self.closed = False
2112+ self.members = [] # list of members as TarInfo objects
2113+ self._loaded = False # flag if all members have been read
2114+ self.offset = self.fileobj.tell()
2115+ # current position in the archive file
2116+ self.inodes = {} # dictionary caching the inodes of
2117+ # archive members already added
2118+
2119+ try:
2120+ if self.mode == "r":
2121+ self.firstmember = None
2122+ self.firstmember = self.next()
2123+
2124+ if self.mode == "a":
2125+ # Move to the end of the archive,
2126+ # before the first empty block.
2127+ while True:
2128+ self.fileobj.seek(self.offset)
2129+ try:
2130+ tarinfo = self.tarinfo.fromtarfile(self)
2131+ self.members.append(tarinfo)
2132+ except EOFHeaderError:
2133+ self.fileobj.seek(self.offset)
2134+ break
2135+ except HeaderError, e:
2136+ raise ReadError(str(e))
2137+
2138+ if self.mode in "aw":
2139+ self._loaded = True
2140+
2141+ if self.pax_headers:
2142+ buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
2143+ self.fileobj.write(buf)
2144+ self.offset += len(buf)
2145+ except:
2146 if not self._extfileobj:
2147 self.fileobj.close()
2148- self.fileobj = None
2149-
2150- def throwaway_until(self, position):
2151- """Read data, throwing it away until we get to position"""
2152- bufsize = 16 * 1024
2153- bytes_to_read = position - self.offset
2154- assert bytes_to_read >= 0
2155- while bytes_to_read >= bufsize:
2156- self.fileobj.read(bufsize)
2157- bytes_to_read -= bufsize
2158- self.fileobj.read(bytes_to_read)
2159- self.offset = position
2160-
2161- def next(self):
2162- """Return the next member from the archive.
2163- Return None if the end is reached.
2164- Can be used in a while statement, is used
2165- for Iteration (see __iter__()) and internally.
2166+ self.closed = True
2167+ raise
2168+
2169+ def _getposix(self):
2170+ return self.format == USTAR_FORMAT
2171+ def _setposix(self, value):
2172+ import warnings
2173+ warnings.warn("use the format attribute instead", DeprecationWarning,
2174+ 2)
2175+ if value:
2176+ self.format = USTAR_FORMAT
2177+ else:
2178+ self.format = GNU_FORMAT
2179+ posix = property(_getposix, _setposix)
2180+
2181+ #--------------------------------------------------------------------------
2182+ # Below are the classmethods which act as alternate constructors to the
2183+ # TarFile class. The open() method is the only one that is needed for
2184+ # public use; it is the "super"-constructor and is able to select an
2185+ # adequate "sub"-constructor for a particular compression using the mapping
2186+ # from OPEN_METH.
2187+ #
2188+ # This concept allows one to subclass TarFile without losing the comfort of
2189+ # the super-constructor. A sub-constructor is registered and made available
2190+ # by adding it to the mapping in OPEN_METH.
2191+
2192+ @classmethod
2193+ def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
2194+ """Open a tar archive for reading, writing or appending. Return
2195+ an appropriate TarFile class.
2196+
2197+ mode:
2198+ 'r' or 'r:*' open for reading with transparent compression
2199+ 'r:' open for reading exclusively uncompressed
2200+ 'r:gz' open for reading with gzip compression
2201+ 'r:bz2' open for reading with bzip2 compression
2202+ 'a' or 'a:' open for appending, creating the file if necessary
2203+ 'w' or 'w:' open for writing without compression
2204+ 'w:gz' open for writing with gzip compression
2205+ 'w:bz2' open for writing with bzip2 compression
2206+
2207+ 'r|*' open a stream of tar blocks with transparent compression
2208+ 'r|' open an uncompressed stream of tar blocks for reading
2209+ 'r|gz' open a gzip compressed stream of tar blocks
2210+ 'r|bz2' open a bzip2 compressed stream of tar blocks
2211+ 'w|' open an uncompressed stream for writing
2212+ 'w|gz' open a gzip compressed stream for writing
2213+ 'w|bz2' open a bzip2 compressed stream for writing
2214 """
2215- if not self.fileobj:
2216- raise ValueError, "I/O operation on closed file"
2217- if self._mode not in "ra":
2218- raise ValueError, "reading from a write-mode file"
2219-
2220- # Read the next block.
2221- # self.fileobj.seek(self.chunks[-1])
2222- #self.fileobj.seek(self.next_chunk)
2223- #self.offset = self.next_chunk
2224- self.throwaway_until(self.next_chunk)
2225- while 1:
2226- buf = self.fileobj.read(BLOCKSIZE)
2227- if not buf:
2228- return None
2229- try:
2230- tarinfo = self._buftoinfo(buf)
2231- except ValueError:
2232- if self.ignore_zeros:
2233- if buf.count("\0") == BLOCKSIZE:
2234- adj = "empty"
2235- else:
2236- adj = "invalid"
2237- self._dbg(2, "0x%X: %s block\n" % (self.offset, adj))
2238- self.offset += BLOCKSIZE
2239+
2240+ if not name and not fileobj:
2241+ raise ValueError("nothing to open")
2242+
2243+ if mode in ("r", "r:*"):
2244+ # Find out which *open() is appropriate for opening the file.
2245+ for comptype in cls.OPEN_METH:
2246+ func = getattr(cls, cls.OPEN_METH[comptype])
2247+ if fileobj is not None:
2248+ saved_pos = fileobj.tell()
2249+ try:
2250+ return func(name, "r", fileobj, **kwargs)
2251+ except (ReadError, CompressionError), e:
2252+ if fileobj is not None:
2253+ fileobj.seek(saved_pos)
2254 continue
2255- else:
2256- return None
2257- break
2258-
2259- # If the TarInfo instance contains a GNUTYPE longname or longlink
2260- # statement, we must process this first.
2261- if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
2262- tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
2263- if not tarinfo:
2264- return None
2265-
2266- if tarinfo.issparse():
2267- assert 0, "Sparse file support turned off"
2268- # Sparse files need some care,
2269- # due to the possible extra headers.
2270- tarinfo.offset = self.offset
2271- self.offset += BLOCKSIZE
2272- origsize = self._proc_sparse(tarinfo)
2273- tarinfo.offset_data = self.offset
2274- blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2275- if remainder:
2276- blocks += 1
2277- self.offset += blocks * BLOCKSIZE
2278- tarinfo.size = origsize
2279+ raise ReadError("file could not be opened successfully")
2280+
2281+ elif ":" in mode:
2282+ filemode, comptype = mode.split(":", 1)
2283+ filemode = filemode or "r"
2284+ comptype = comptype or "tar"
2285+
2286+ # Select the *open() function according to
2287+ # given compression.
2288+ if comptype in cls.OPEN_METH:
2289+ func = getattr(cls, cls.OPEN_METH[comptype])
2290+ else:
2291+ raise CompressionError("unknown compression type %r" % comptype)
2292+ return func(name, filemode, fileobj, **kwargs)
2293+
2294+ elif "|" in mode:
2295+ filemode, comptype = mode.split("|", 1)
2296+ filemode = filemode or "r"
2297+ comptype = comptype or "tar"
2298+
2299+ if filemode not in "rw":
2300+ raise ValueError("mode must be 'r' or 'w'")
2301+
2302+ t = cls(name, filemode,
2303+ _Stream(name, filemode, comptype, fileobj, bufsize),
2304+ **kwargs)
2305+ t._extfileobj = False
2306+ return t
2307+
2308+ elif mode in "aw":
2309+ return cls.taropen(name, mode, fileobj, **kwargs)
2310+
2311+ raise ValueError("undiscernible mode")
2312+
2313+ @classmethod
2314+ def taropen(cls, name, mode="r", fileobj=None, **kwargs):
2315+ """Open uncompressed tar archive name for reading or writing.
2316+ """
2317+ if len(mode) > 1 or mode not in "raw":
2318+ raise ValueError("mode must be 'r', 'a' or 'w'")
2319+ return cls(name, mode, fileobj, **kwargs)
2320+
2321+ @classmethod
2322+ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
2323+ """Open gzip compressed tar archive name for reading or writing.
2324+ Appending is not allowed.
2325+ """
2326+ if len(mode) > 1 or mode not in "rw":
2327+ raise ValueError("mode must be 'r' or 'w'")
2328+
2329+ try:
2330+ import gzip
2331+ gzip.GzipFile
2332+ except (ImportError, AttributeError):
2333+ raise CompressionError("gzip module is not available")
2334+
2335+ if fileobj is None:
2336+ fileobj = bltn_open(name, mode + "b")
2337+
2338+ try:
2339+ t = cls.taropen(name, mode,
2340+ gzip.GzipFile(name, mode, compresslevel, fileobj),
2341+ **kwargs)
2342+ except IOError:
2343+ raise ReadError("not a gzip file")
2344+ t._extfileobj = False
2345+ return t
2346+
2347+ @classmethod
2348+ def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
2349+ """Open bzip2 compressed tar archive name for reading or writing.
2350+ Appending is not allowed.
2351+ """
2352+ if len(mode) > 1 or mode not in "rw":
2353+ raise ValueError("mode must be 'r' or 'w'.")
2354+
2355+ try:
2356+ import bz2
2357+ except ImportError:
2358+ raise CompressionError("bz2 module is not available")
2359+
2360+ if fileobj is not None:
2361+ fileobj = _BZ2Proxy(fileobj, mode)
2362 else:
2363- tarinfo.offset = self.offset
2364- self.offset += BLOCKSIZE
2365- tarinfo.offset_data = self.offset
2366- if tarinfo.isreg():
2367- ## Skip the following data blocks.
2368- blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2369- if remainder:
2370- blocks += 1
2371- self.next_chunk = self.offset + (blocks * BLOCKSIZE)
2372- else: self.next_chunk = self.offset
2373-
2374- #self.members.append(tarinfo) These use too much memory
2375- #self.membernames.append(tarinfo.name)
2376- #self.chunks.append(self.offset)
2377- return tarinfo
2378+ fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
2379+
2380+ try:
2381+ t = cls.taropen(name, mode, fileobj, **kwargs)
2382+ except (IOError, EOFError):
2383+ raise ReadError("not a bzip2 file")
2384+ t._extfileobj = False
2385+ return t
2386+
2387+ # All *open() methods are registered here.
2388+ OPEN_METH = {
2389+ "tar": "taropen", # uncompressed tar
2390+ "gz": "gzopen", # gzip compressed tar
2391+ "bz2": "bz2open" # bzip2 compressed tar
2392+ }
2393+
2394+ #--------------------------------------------------------------------------
2395+ # The public methods which TarFile provides:
2396+
2397+ def close(self):
2398+ """Close the TarFile. In write-mode, two finishing zero blocks are
2399+ appended to the archive.
2400+ """
2401+ if self.closed:
2402+ return
2403+
2404+ if self.mode in "aw":
2405+ self.fileobj.write(NUL * (BLOCKSIZE * 2))
2406+ self.offset += (BLOCKSIZE * 2)
2407+ # fill up the end with zero-blocks
2408+ # (like option -b20 for tar does)
2409+ blocks, remainder = divmod(self.offset, RECORDSIZE)
2410+ if remainder > 0:
2411+ self.fileobj.write(NUL * (RECORDSIZE - remainder))
2412+
2413+ if not self._extfileobj:
2414+ self.fileobj.close()
2415+ self.closed = True
2416
2417 def getmember(self, name):
2418- """Return a TarInfo instance for member name.
2419- """
2420- if name not in self.membernames and not self._loaded:
2421- self._load()
2422- if name not in self.membernames:
2423- raise KeyError, "filename `%s' not found in tar archive" % name
2424- return self._getmember(name)
2425-
2426- def getinfo(self, name):
2427- """Return a TarInfo instance for member name.
2428- This method will be deprecated in 0.6,
2429- use getmember() instead.
2430- """
2431- # XXX kick this out in 0.6
2432- import warnings
2433- warnings.warn("use getmember() instead", DeprecationWarning)
2434- return self.getmember(name)
2435+ """Return a TarInfo object for member `name'. If `name' can not be
2436+ found in the archive, KeyError is raised. If a member occurs more
2437+ than once in the archive, its last occurrence is assumed to be the
2438+ most up-to-date version.
2439+ """
2440+ tarinfo = self._getmember(name)
2441+ if tarinfo is None:
2442+ raise KeyError("filename %r not found" % name)
2443+ return tarinfo
2444
2445 def getmembers(self):
2446- """Return a list of all members in the archive
2447- (as TarInfo instances).
2448+ """Return the members of the archive as a list of TarInfo objects. The
2449+ list has the same order as the members in the archive.
2450 """
2451+ self._check()
2452 if not self._loaded: # if we want to obtain a list of
2453 self._load() # all members, we first have to
2454 # scan the whole archive.
2455 return self.members
2456
2457 def getnames(self):
2458- """Return a list of names of all members in the
2459- archive.
2460- """
2461- if not self._loaded:
2462- self._load()
2463- return self.membernames
2464-
2465- def gettarinfo(self, name, arcname=None):
2466- """Create a TarInfo instance from an existing file.
2467- Optional arcname defines the name under which the file
2468- shall be stored in the archive.
2469- """
2470- # Now, fill the TarInfo instance with
2471+ """Return the members of the archive as a list of their names. It has
2472+ the same order as the list returned by getmembers().
2473+ """
2474+ return [tarinfo.name for tarinfo in self.getmembers()]
2475+
2476+ def gettarinfo(self, name=None, arcname=None, fileobj=None):
2477+ """Create a TarInfo object for either the file `name' or the file
2478+ object `fileobj' (using os.fstat on its file descriptor). You can
2479+ modify some of the TarInfo's attributes before you add it using
2480+ addfile(). If given, `arcname' specifies an alternative name for the
2481+ file in the archive.
2482+ """
2483+ self._check("aw")
2484+
2485+ # When fileobj is given, replace name by
2486+ # fileobj's real name.
2487+ if fileobj is not None:
2488+ name = fileobj.name
2489+
2490+ # Building the name of the member in the archive.
2491+ # Backward slashes are converted to forward slashes,
2492+ # Absolute paths are turned to relative paths.
2493+ if arcname is None:
2494+ arcname = name
2495+ drv, arcname = os.path.splitdrive(arcname)
2496+ arcname = arcname.replace(os.sep, "/")
2497+ arcname = arcname.lstrip("/")
2498+
2499+ # Now, fill the TarInfo object with
2500 # information specific for the file.
2501- tarinfo = TarInfo()
2502-
2503- if arcname is None: tarinfo.set_arcname(name)
2504- else: tarinfo.set_arcname(arcname)
2505+ tarinfo = self.tarinfo()
2506+ tarinfo.tarfile = self
2507
2508 # Use os.stat or os.lstat, depending on platform
2509 # and if symlinks shall be resolved.
2510- if hasattr(os, "lstat") and not self.dereference:
2511- statres = os.lstat(name)
2512+ if fileobj is None:
2513+ if hasattr(os, "lstat") and not self.dereference:
2514+ statres = os.lstat(name)
2515+ else:
2516+ statres = os.stat(name)
2517 else:
2518- statres = os.stat(name)
2519-
2520- if not tarinfo.init_from_stat(statres): return None
2521-
2522- if tarinfo.type == REGTYPE:
2523- inode = (statres.st_ino, statres.st_dev, statres.st_mtime)
2524- if inode in self.inodes.keys() and not self.dereference:
2525+ statres = os.fstat(fileobj.fileno())
2526+ linkname = ""
2527+
2528+ stmd = statres.st_mode
2529+ if stat.S_ISREG(stmd):
2530+ inode = (statres.st_ino, statres.st_dev)
2531+ if not self.dereference and statres.st_nlink > 1 and \
2532+ inode in self.inodes and arcname != self.inodes[inode]:
2533 # Is it a hardlink to an already
2534 # archived file?
2535- tarinfo.type = LNKTYPE
2536- tarinfo.linkname = self.inodes[inode]
2537+ type = LNKTYPE
2538+ linkname = self.inodes[inode]
2539 else:
2540 # The inode is added only if its valid.
2541 # For win32 it is always 0.
2542- if inode[0]: self.inodes[inode] = tarinfo.name
2543- elif tarinfo.type == SYMTYPE:
2544- tarinfo.linkname = os.readlink(name)
2545- tarinfo.size = 0
2546-
2547+ type = REGTYPE
2548+ if inode[0]:
2549+ self.inodes[inode] = arcname
2550+ elif stat.S_ISDIR(stmd):
2551+ type = DIRTYPE
2552+ elif stat.S_ISFIFO(stmd):
2553+ type = FIFOTYPE
2554+ elif stat.S_ISLNK(stmd):
2555+ type = SYMTYPE
2556+ linkname = os.readlink(name)
2557+ elif stat.S_ISCHR(stmd):
2558+ type = CHRTYPE
2559+ elif stat.S_ISBLK(stmd):
2560+ type = BLKTYPE
2561+ else:
2562+ return None
2563+
2564+ # Fill the TarInfo object with all
2565+ # information we can get.
2566+ tarinfo.name = arcname
2567+ tarinfo.mode = stmd
2568+ tarinfo.uid = statres.st_uid
2569+ tarinfo.gid = statres.st_gid
2570+ if type == REGTYPE:
2571+ tarinfo.size = statres.st_size
2572+ else:
2573+ tarinfo.size = 0L
2574+ tarinfo.mtime = statres.st_mtime
2575+ tarinfo.type = type
2576+ tarinfo.linkname = linkname
2577+ if pwd:
2578+ try:
2579+ tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2580+ except KeyError:
2581+ pass
2582+ if grp:
2583+ try:
2584+ tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2585+ except KeyError:
2586+ pass
2587+
2588+ if type in (CHRTYPE, BLKTYPE):
2589+ if hasattr(os, "major") and hasattr(os, "minor"):
2590+ tarinfo.devmajor = os.major(statres.st_rdev)
2591+ tarinfo.devminor = os.minor(statres.st_rdev)
2592 return tarinfo
2593
2594- def list(self, verbose=1):
2595- """Print a formatted listing of the archive's
2596- contents to stdout.
2597+ def list(self, verbose=True):
2598+ """Print a table of contents to sys.stdout. If `verbose' is False, only
2599+ the names of the members are printed. If it is True, an `ls -l'-like
2600+ output is produced.
2601 """
2602+ self._check()
2603+
2604 for tarinfo in self:
2605 if verbose:
2606 print filemode(tarinfo.mode),
2607- print tarinfo.uname + "/" + tarinfo.gname,
2608+ print "%s/%s" % (tarinfo.uname or tarinfo.uid,
2609+ tarinfo.gname or tarinfo.gid),
2610 if tarinfo.ischr() or tarinfo.isblk():
2611- print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)),
2612+ print "%10s" % ("%d,%d" \
2613+ % (tarinfo.devmajor, tarinfo.devminor)),
2614 else:
2615 print "%10d" % tarinfo.size,
2616 print "%d-%02d-%02d %02d:%02d:%02d" \
2617- % time.gmtime(tarinfo.mtime)[:6],
2618+ % time.localtime(tarinfo.mtime)[:6],
2619
2620- print tarinfo.name,
2621+ if tarinfo.isdir():
2622+ print tarinfo.name + "/",
2623+ else:
2624+ print tarinfo.name,
2625
2626 if verbose:
2627 if tarinfo.issym():
2628@@ -693,254 +1943,290 @@
2629 print "link to", tarinfo.linkname,
2630 print
2631
2632- def add(self, name, arcname=None, recursive=1):
2633- """Add a file or a directory to the archive.
2634- Directory addition is recursive by default.
2635+ def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
2636+ """Add the file `name' to the archive. `name' may be any type of file
2637+ (directory, fifo, symbolic link, etc.). If given, `arcname'
2638+ specifies an alternative name for the file in the archive.
2639+ Directories are added recursively by default. This can be avoided by
2640+ setting `recursive' to False. `exclude' is a function that should
2641+ return True for each filename to be excluded. `filter' is a function
2642+ that expects a TarInfo object argument and returns the changed
2643+ TarInfo object, if it returns None the TarInfo object will be
2644+ excluded from the archive.
2645 """
2646- if not self.fileobj:
2647- raise ValueError, "I/O operation on closed file"
2648- if self._mode == "r":
2649- raise ValueError, "writing to a read-mode file"
2650+ self._check("aw")
2651
2652 if arcname is None:
2653 arcname = name
2654
2655+ # Exclude pathnames.
2656+ if exclude is not None:
2657+ import warnings
2658+ warnings.warn("use the filter argument instead",
2659+ DeprecationWarning, 2)
2660+ if exclude(name):
2661+ self._dbg(2, "tarfile: Excluded %r" % name)
2662+ return
2663+
2664 # Skip if somebody tries to archive the archive...
2665- if os.path.abspath(name) == os.path.abspath(self.name):
2666- self._dbg(2, "tarfile: Skipped `%s'\n" % name)
2667- return
2668-
2669- # Special case: The user wants to add the current
2670- # working directory.
2671- if name == ".":
2672- if recursive:
2673- if arcname == ".":
2674- arcname = ""
2675- for f in os.listdir("."):
2676- self.add(f, os.path.join(arcname, f))
2677- return
2678-
2679- self._dbg(1, "%s\n" % name)
2680-
2681- # Create a TarInfo instance from the file.
2682+ if self.name is not None and os.path.abspath(name) == self.name:
2683+ self._dbg(2, "tarfile: Skipped %r" % name)
2684+ return
2685+
2686+ self._dbg(1, name)
2687+
2688+ # Create a TarInfo object from the file.
2689 tarinfo = self.gettarinfo(name, arcname)
2690
2691 if tarinfo is None:
2692- self._dbg(1, "tarfile: Unsupported type `%s'\n" % name)
2693+ self._dbg(1, "tarfile: Unsupported type %r" % name)
2694+ return
2695
2696+ # Change or exclude the TarInfo object.
2697+ if filter is not None:
2698+ tarinfo = filter(tarinfo)
2699+ if tarinfo is None:
2700+ self._dbg(2, "tarfile: Excluded %r" % name)
2701+ return
2702
2703 # Append the tar header and data to the archive.
2704 if tarinfo.isreg():
2705- f = __builtin__.file(name, "rb")
2706- self.addfile(tarinfo, fileobj = f)
2707+ f = bltn_open(name, "rb")
2708+ self.addfile(tarinfo, f)
2709 f.close()
2710
2711- if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
2712- tarinfo.size = 0l
2713- self.addfile(tarinfo)
2714-
2715- if tarinfo.isdir():
2716+ elif tarinfo.isdir():
2717 self.addfile(tarinfo)
2718 if recursive:
2719 for f in os.listdir(name):
2720- self.add(os.path.join(name, f), os.path.join(arcname, f))
2721+ self.add(os.path.join(name, f), os.path.join(arcname, f),
2722+ recursive, exclude, filter)
2723+
2724+ else:
2725+ self.addfile(tarinfo)
2726
2727 def addfile(self, tarinfo, fileobj=None):
2728- """Add the content of fileobj to the tarfile.
2729- The amount of bytes to read is determined by
2730- the size attribute in the tarinfo instance.
2731+ """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2732+ given, tarinfo.size bytes are read from it and added to the archive.
2733+ You can create TarInfo objects using gettarinfo().
2734+ On Windows platforms, `fileobj' should always be opened with mode
2735+ 'rb' to avoid irritation about the file size.
2736 """
2737- if not self.fileobj:
2738- raise ValueError, "I/O operation on closed file"
2739- if self._mode == "r":
2740- raise ValueError, "writing to a read-mode file"
2741-
2742- # XXX What was this good for again?
2743- #try:
2744- # self.fileobj.seek(self.chunks[-1])
2745- #except IOError:
2746- # pass
2747-
2748- full_headers = self._get_full_headers(tarinfo)
2749- self.fileobj.write(full_headers)
2750- assert len(full_headers) % BLOCKSIZE == 0
2751- self.offset += len(full_headers)
2752+ self._check("aw")
2753+
2754+ tarinfo = copy.copy(tarinfo)
2755+
2756+ buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2757+ self.fileobj.write(buf)
2758+ self.offset += len(buf)
2759
2760 # If there's data to follow, append it.
2761 if fileobj is not None:
2762 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2763 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2764 if remainder > 0:
2765- self.fileobj.write("\0" * (BLOCKSIZE - remainder))
2766+ self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2767 blocks += 1
2768 self.offset += blocks * BLOCKSIZE
2769
2770- #self.members.append(tarinfo) #These take up too much memory
2771- #self.membernames.append(tarinfo.name)
2772- #self.chunks.append(self.offset)
2773-
2774- def _get_full_headers(self, tarinfo):
2775- """Return string containing headers around tarinfo, including gnulongs
2776- """
2777- buf = ""
2778- # Now we must check if the strings for filename
2779- # and linkname fit into the posix header.
2780- # (99 chars + "\0" for each)
2781- # If not, we must create GNU extension headers.
2782- # If both filename and linkname are too long,
2783- # the longlink is first to be written out.
2784- if len(tarinfo.linkname) >= LENGTH_LINK - 1:
2785- buf += self._return_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
2786- tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
2787- if len(tarinfo.name) >= LENGTH_NAME - 1:
2788- buf += self._return_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
2789- tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
2790- return buf + tarinfo.getheader()
2791-
2792-# def untar(self, path):
2793-# """Untar the whole archive to path.
2794-# """
2795-# later = []
2796-# for tarinfo in self:
2797-# if tarinfo.isdir():
2798-# later.append(tarinfo)
2799-# self.extract(tarinfo, path)
2800-# for tarinfo in later:
2801-# self._utime(tarinfo, os.path.join(path, tarinfo.name))
2802-
2803- def extractfile(self, member):
2804- """Extract member from the archive and return a file-like
2805- object. member may be a name or a TarInfo instance.
2806- """
2807- if not self.fileobj:
2808- raise ValueError, "I/O operation on closed file"
2809- if self._mode != "r":
2810- raise ValueError, "reading from a write-mode file"
2811-
2812- if isinstance(member, TarInfo):
2813- tarinfo = member
2814- else:
2815- tarinfo = self.getmember(member)
2816-
2817- if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2818- return _FileObject(self, tarinfo)
2819- elif tarinfo.islnk() or tarinfo.issym():
2820- return self.extractfile(self._getmember(tarinfo.linkname, tarinfo))
2821- else:
2822- return None
2823+ self.members.append(tarinfo)
2824+
2825+ def extractall(self, path=".", members=None):
2826+ """Extract all members from the archive to the current working
2827+ directory and set owner, modification time and permissions on
2828+ directories afterwards. `path' specifies a different directory
2829+ to extract to. `members' is optional and must be a subset of the
2830+ list returned by getmembers().
2831+ """
2832+ directories = []
2833+
2834+ if members is None:
2835+ members = self
2836+
2837+ for tarinfo in members:
2838+ if tarinfo.isdir():
2839+ # Extract directories with a safe mode.
2840+ directories.append(tarinfo)
2841+ tarinfo = copy.copy(tarinfo)
2842+ tarinfo.mode = 0700
2843+ self.extract(tarinfo, path)
2844+
2845+ # Reverse sort directories.
2846+ directories.sort(key=operator.attrgetter('name'))
2847+ directories.reverse()
2848+
2849+ # Set correct owner, mtime and filemode on directories.
2850+ for tarinfo in directories:
2851+ dirpath = os.path.join(path, tarinfo.name)
2852+ try:
2853+ self.chown(tarinfo, dirpath)
2854+ self.utime(tarinfo, dirpath)
2855+ self.chmod(tarinfo, dirpath)
2856+ except ExtractError, e:
2857+ if self.errorlevel > 1:
2858+ raise
2859+ else:
2860+ self._dbg(1, "tarfile: %s" % e)
2861
2862 def extract(self, member, path=""):
2863- """Extract member from the archive and write it to
2864- current working directory using its full pathname.
2865- If optional path is given, it is attached before the
2866- pathname.
2867- member may be a name or a TarInfo instance.
2868+ """Extract a member from the archive to the current working directory,
2869+ using its full name. Its file information is extracted as accurately
2870+ as possible. `member' may be a filename or a TarInfo object. You can
2871+ specify a different directory using `path'.
2872 """
2873- if not self.fileobj:
2874- raise ValueError, "I/O operation on closed file"
2875- if self._mode != "r":
2876- raise ValueError, "reading from a write-mode file"
2877+ self._check("r")
2878
2879- if isinstance(member, TarInfo):
2880+ if isinstance(member, basestring):
2881+ tarinfo = self.getmember(member)
2882+ else:
2883 tarinfo = member
2884- else:
2885- tarinfo = self.getmember(member)
2886-
2887- self._dbg(1, tarinfo.name)
2888+
2889+ # Prepare the link target for makelink().
2890+ if tarinfo.islnk():
2891+ tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2892+
2893 try:
2894 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2895 except EnvironmentError, e:
2896 if self.errorlevel > 0:
2897 raise
2898 else:
2899- self._dbg(1, "\ntarfile: %s `%s'" % (e.strerror, e.filename))
2900- except TarError, e:
2901+ if e.filename is None:
2902+ self._dbg(1, "tarfile: %s" % e.strerror)
2903+ else:
2904+ self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2905+ except ExtractError, e:
2906 if self.errorlevel > 1:
2907 raise
2908 else:
2909- self._dbg(1, "\ntarfile: %s" % e)
2910- self._dbg(1, "\n")
2911+ self._dbg(1, "tarfile: %s" % e)
2912+
2913+ def extractfile(self, member):
2914+ """Extract a member from the archive as a file object. `member' may be
2915+ a filename or a TarInfo object. If `member' is a regular file, a
2916+ file-like object is returned. If `member' is a link, a file-like
2917+ object is constructed from the link's target. If `member' is none of
2918+ the above, None is returned.
2919+ The file-like object is read-only and provides the following
2920+ methods: read(), readline(), readlines(), seek() and tell()
2921+ """
2922+ self._check("r")
2923+
2924+ if isinstance(member, basestring):
2925+ tarinfo = self.getmember(member)
2926+ else:
2927+ tarinfo = member
2928+
2929+ if tarinfo.isreg():
2930+ return self.fileobject(self, tarinfo)
2931+
2932+ elif tarinfo.type not in SUPPORTED_TYPES:
2933+ # If a member's type is unknown, it is treated as a
2934+ # regular file.
2935+ return self.fileobject(self, tarinfo)
2936+
2937+ elif tarinfo.islnk() or tarinfo.issym():
2938+ if isinstance(self.fileobj, _Stream):
2939+ # A small but ugly workaround for the case that someone tries
2940+ # to extract a (sym)link as a file-object from a non-seekable
2941+ # stream of tar blocks.
2942+ raise StreamError("cannot extract (sym)link as file object")
2943+ else:
2944+ # A (sym)link's file object is its target's file object.
2945+ return self.extractfile(self._find_link_target(tarinfo))
2946+ else:
2947+ # If there's no data associated with the member (directory, chrdev,
2948+ # blkdev, etc.), return None instead of a file object.
2949+ return None
2950
2951 def _extract_member(self, tarinfo, targetpath):
2952- """Extract the TarInfo instance tarinfo to a physical
2953+ """Extract the TarInfo object tarinfo to a physical
2954 file called targetpath.
2955 """
2956- # Fetch the TarInfo instance for the given name
2957+ # Fetch the TarInfo object for the given name
2958 # and build the destination pathname, replacing
2959 # forward slashes to platform specific separators.
2960- if targetpath[-1:] == "/":
2961- targetpath = targetpath[:-1]
2962- targetpath = os.path.normpath(targetpath)
2963+ targetpath = targetpath.rstrip("/")
2964+ targetpath = targetpath.replace("/", os.sep)
2965
2966 # Create all upper directories.
2967 upperdirs = os.path.dirname(targetpath)
2968 if upperdirs and not os.path.exists(upperdirs):
2969- ti = TarInfo()
2970- ti.name = ""
2971- ti.type = DIRTYPE
2972- ti.mode = 0777
2973- ti.mtime = tarinfo.mtime
2974- ti.uid = tarinfo.uid
2975- ti.gid = tarinfo.gid
2976- ti.uname = tarinfo.uname
2977- ti.gname = tarinfo.gname
2978- for d in os.path.split(os.path.splitdrive(upperdirs)[1]):
2979- ti.name = os.path.join(ti.name, d)
2980- self._extract_member(ti, ti.name)
2981+ # Create directories that are not part of the archive with
2982+ # default permissions.
2983+ os.makedirs(upperdirs)
2984+
2985+ if tarinfo.islnk() or tarinfo.issym():
2986+ self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2987+ else:
2988+ self._dbg(1, tarinfo.name)
2989
2990 if tarinfo.isreg():
2991- self._makefile(tarinfo, targetpath)
2992+ self.makefile(tarinfo, targetpath)
2993 elif tarinfo.isdir():
2994- self._makedir(tarinfo, targetpath)
2995+ self.makedir(tarinfo, targetpath)
2996 elif tarinfo.isfifo():
2997- self._makefifo(tarinfo, targetpath)
2998+ self.makefifo(tarinfo, targetpath)
2999 elif tarinfo.ischr() or tarinfo.isblk():
3000- self._makedev(tarinfo, targetpath)
3001+ self.makedev(tarinfo, targetpath)
3002 elif tarinfo.islnk() or tarinfo.issym():
3003- self._makelink(tarinfo, targetpath)
3004+ self.makelink(tarinfo, targetpath)
3005+ elif tarinfo.type not in SUPPORTED_TYPES:
3006+ self.makeunknown(tarinfo, targetpath)
3007 else:
3008- self._makefile(tarinfo, targetpath)
3009- if tarinfo.type not in SUPPORTED_TYPES:
3010- self._dbg(1, "\ntarfile: Unknown file type '%s', " \
3011- "extracted as regular file." % tarinfo.type)
3012+ self.makefile(tarinfo, targetpath)
3013
3014+ self.chown(tarinfo, targetpath)
3015 if not tarinfo.issym():
3016- self._chown(tarinfo, targetpath)
3017- self._chmod(tarinfo, targetpath)
3018- if not tarinfo.isdir():
3019- self._utime(tarinfo, targetpath)
3020-
3021- def _makedir(self, tarinfo, targetpath):
3022- """Make a directory called targetpath out of tarinfo.
3023+ self.chmod(tarinfo, targetpath)
3024+ self.utime(tarinfo, targetpath)
3025+
3026+ #--------------------------------------------------------------------------
3027+ # Below are the different file methods. They are called via
3028+ # _extract_member() when extract() is called. They can be replaced in a
3029+ # subclass to implement other functionality.
3030+
3031+ def makedir(self, tarinfo, targetpath):
3032+ """Make a directory called targetpath.
3033 """
3034 try:
3035- os.mkdir(targetpath)
3036+ # Use a safe mode for the directory, the real mode is set
3037+ # later in _extract_member().
3038+ os.mkdir(targetpath, 0700)
3039 except EnvironmentError, e:
3040 if e.errno != errno.EEXIST:
3041 raise
3042
3043- def _makefile(self, tarinfo, targetpath):
3044- """Make a file called targetpath out of tarinfo.
3045+ def makefile(self, tarinfo, targetpath):
3046+ """Make a file called targetpath.
3047 """
3048 source = self.extractfile(tarinfo)
3049- target = __builtin__.file(targetpath, "wb")
3050+ target = bltn_open(targetpath, "wb")
3051 copyfileobj(source, target)
3052 source.close()
3053 target.close()
3054
3055- def _makefifo(self, tarinfo, targetpath):
3056- """Make a fifo called targetpath out of tarinfo.
3057+ def makeunknown(self, tarinfo, targetpath):
3058+ """Make a file from a TarInfo object with an unknown type
3059+ at targetpath.
3060+ """
3061+ self.makefile(tarinfo, targetpath)
3062+ self._dbg(1, "tarfile: Unknown file type %r, " \
3063+ "extracted as regular file." % tarinfo.type)
3064+
3065+ def makefifo(self, tarinfo, targetpath):
3066+ """Make a fifo called targetpath.
3067 """
3068 if hasattr(os, "mkfifo"):
3069 os.mkfifo(targetpath)
3070 else:
3071- raise TarError, "Fifo not supported by system"
3072+ raise ExtractError("fifo not supported by system")
3073
3074- def _makedev(self, tarinfo, targetpath):
3075- """Make a character or block device called targetpath out of tarinfo.
3076+ def makedev(self, tarinfo, targetpath):
3077+ """Make a character or block device called targetpath.
3078 """
3079- if not hasattr(os, "mknod"):
3080- raise TarError, "Special devices not supported by system"
3081+ if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
3082+ raise ExtractError("special devices not supported by system")
3083
3084 mode = tarinfo.mode
3085 if tarinfo.isblk():
3086@@ -948,109 +2234,187 @@
3087 else:
3088 mode |= stat.S_IFCHR
3089
3090- # This if statement should go away when python-2.3a0-devicemacros
3091- # patch succeeds.
3092- if hasattr(os, "makedev"):
3093- os.mknod(targetpath, mode,
3094- os.makedev(tarinfo.devmajor, tarinfo.devminor))
3095- else:
3096- os.mknod(targetpath, mode,
3097- tarinfo.devmajor, tarinfo.devminor)
3098+ os.mknod(targetpath, mode,
3099+ os.makedev(tarinfo.devmajor, tarinfo.devminor))
3100
3101- def _makelink(self, tarinfo, targetpath):
3102- """Make a (symbolic) link called targetpath out of tarinfo.
3103- If it cannot be made (due to platform or failure), we try
3104- to make a copy of the referenced file instead of a link.
3105+ def makelink(self, tarinfo, targetpath):
3106+ """Make a (symbolic) link called targetpath. If it cannot be created
3107+ (platform limitation), we try to make a copy of the referenced file
3108+ instead of a link.
3109 """
3110- linkpath = tarinfo.linkname
3111- self._dbg(1, " -> %s" % linkpath)
3112- try:
3113+ if hasattr(os, "symlink") and hasattr(os, "link"):
3114+ # For systems that support symbolic and hard links.
3115 if tarinfo.issym():
3116- os.symlink(linkpath, targetpath)
3117+ os.symlink(tarinfo.linkname, targetpath)
3118 else:
3119- linkpath = os.path.join(os.path.dirname(targetpath),
3120- linkpath)
3121- os.link(linkpath, targetpath)
3122- except AttributeError:
3123- linkpath = os.path.join(os.path.dirname(tarinfo.name),
3124- tarinfo.linkname)
3125- linkpath = normpath(linkpath)
3126+ # See extract().
3127+ if os.path.exists(tarinfo._link_target):
3128+ os.link(tarinfo._link_target, targetpath)
3129+ else:
3130+ self._extract_member(self._find_link_target(tarinfo), targetpath)
3131+ else:
3132 try:
3133- self._extract_member(self.getmember(linkpath), targetpath)
3134- except (IOError, OSError, KeyError), e: #@UnusedVariable
3135- linkpath = os.path.normpath(linkpath)
3136- try:
3137- shutil.copy2(linkpath, targetpath)
3138- except EnvironmentError, e: #@UnusedVariable
3139- raise TarError, "Link could not be created"
3140+ self._extract_member(self._find_link_target(tarinfo), targetpath)
3141+ except KeyError:
3142+ raise ExtractError("unable to resolve link inside archive")
3143
3144- def _chown(self, tarinfo, targetpath):
3145+ def chown(self, tarinfo, targetpath):
3146 """Set owner of targetpath according to tarinfo.
3147 """
3148- if pwd and os.geteuid() == 0:
3149+ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
3150 # We have to be root to do so.
3151- try: g = gname2gid(tarinfo.gname)
3152- except KeyError:
3153- try:
3154- gid2gname(tarinfo.gid) # Make sure gid exists
3155- g = tarinfo.gid
3156- except KeyError: g = os.getgid()
3157- try: u = uname2uid(tarinfo.uname)
3158- except KeyError:
3159- try:
3160- uid2uname(tarinfo.uid) # Make sure uid exists
3161- u = tarinfo.uid
3162- except KeyError: u = os.getuid()
3163+ try:
3164+ g = grp.getgrnam(tarinfo.gname)[2]
3165+ except KeyError:
3166+ try:
3167+ g = grp.getgrgid(tarinfo.gid)[2]
3168+ except KeyError:
3169+ g = os.getgid()
3170+ try:
3171+ u = pwd.getpwnam(tarinfo.uname)[2]
3172+ except KeyError:
3173+ try:
3174+ u = pwd.getpwuid(tarinfo.uid)[2]
3175+ except KeyError:
3176+ u = os.getuid()
3177 try:
3178 if tarinfo.issym() and hasattr(os, "lchown"):
3179 os.lchown(targetpath, u, g)
3180 else:
3181- os.chown(targetpath, u, g)
3182+ if sys.platform != "os2emx":
3183+ os.chown(targetpath, u, g)
3184 except EnvironmentError, e:
3185- self._dbg(2, "\ntarfile: (chown failed), %s `%s'"
3186- % (e.strerror, e.filename))
3187+ raise ExtractError("could not change owner")
3188
3189- def _chmod(self, tarinfo, targetpath):
3190+ def chmod(self, tarinfo, targetpath):
3191 """Set file permissions of targetpath according to tarinfo.
3192 """
3193- try:
3194- os.chmod(targetpath, tarinfo.mode)
3195- except EnvironmentError, e:
3196- self._dbg(2, "\ntarfile: (chmod failed), %s `%s'"
3197- % (e.strerror, e.filename))
3198+ if hasattr(os, 'chmod'):
3199+ try:
3200+ os.chmod(targetpath, tarinfo.mode)
3201+ except EnvironmentError, e:
3202+ raise ExtractError("could not change mode")
3203
3204- def _utime(self, tarinfo, targetpath):
3205+ def utime(self, tarinfo, targetpath):
3206 """Set modification time of targetpath according to tarinfo.
3207 """
3208+ if not hasattr(os, 'utime'):
3209+ return
3210 try:
3211 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
3212 except EnvironmentError, e:
3213- self._dbg(2, "\ntarfile: (utime failed), %s `%s'"
3214- % (e.strerror, e.filename))
3215-
3216- def _getmember(self, name, tarinfo=None):
3217+ raise ExtractError("could not change modification time")
3218+
3219+ #--------------------------------------------------------------------------
3220+ def next(self):
3221+ """Return the next member of the archive as a TarInfo object, when
3222+ TarFile is opened for reading. Return None if there is no more
3223+ available.
3224+ """
3225+ self._check("ra")
3226+ if self.firstmember is not None:
3227+ m = self.firstmember
3228+ self.firstmember = None
3229+ return m
3230+
3231+ # Read the next block.
3232+ self.fileobj.seek(self.offset)
3233+ tarinfo = None
3234+ while True:
3235+ try:
3236+ tarinfo = self.tarinfo.fromtarfile(self)
3237+ except EOFHeaderError, e:
3238+ if self.ignore_zeros:
3239+ self._dbg(2, "0x%X: %s" % (self.offset, e))
3240+ self.offset += BLOCKSIZE
3241+ continue
3242+ except InvalidHeaderError, e:
3243+ if self.ignore_zeros:
3244+ self._dbg(2, "0x%X: %s" % (self.offset, e))
3245+ self.offset += BLOCKSIZE
3246+ continue
3247+ elif self.offset == 0:
3248+ raise ReadError(str(e))
3249+ except EmptyHeaderError:
3250+ if self.offset == 0:
3251+ raise ReadError("empty file")
3252+ except TruncatedHeaderError, e:
3253+ if self.offset == 0:
3254+ raise ReadError(str(e))
3255+ except SubsequentHeaderError, e:
3256+ raise ReadError(str(e))
3257+ break
3258+
3259+ if tarinfo is not None:
3260+ self.members.append(tarinfo)
3261+ else:
3262+ self._loaded = True
3263+
3264+ return tarinfo
3265+
3266+ #--------------------------------------------------------------------------
3267+ # Little helper methods:
3268+
3269+ def _getmember(self, name, tarinfo=None, normalize=False):
3270 """Find an archive member by name from bottom to top.
3271 If tarinfo is given, it is used as the starting point.
3272 """
3273- if tarinfo is None:
3274- end = len(self.members)
3275- else:
3276- end = self.members.index(tarinfo)
3277-
3278- for i in xrange(end - 1, -1, -1):
3279- if name == self.membernames[i]:
3280- return self.members[i]
3281+ # Ensure that all members have been loaded.
3282+ members = self.getmembers()
3283+
3284+ # Limit the member search list up to tarinfo.
3285+ if tarinfo is not None:
3286+ members = members[:members.index(tarinfo)]
3287+
3288+ if normalize:
3289+ name = os.path.normpath(name)
3290+
3291+ for member in reversed(members):
3292+ if normalize:
3293+ member_name = os.path.normpath(member.name)
3294+ else:
3295+ member_name = member.name
3296+
3297+ if name == member_name:
3298+ return member
3299
3300 def _load(self):
3301 """Read through the entire archive file and look for readable
3302 members.
3303 """
3304- while 1:
3305+ while True:
3306 tarinfo = self.next()
3307 if tarinfo is None:
3308 break
3309- self._loaded = 1
3310- return
3311+ self._loaded = True
3312+
3313+ def _check(self, mode=None):
3314+ """Check if TarFile is still open, and if the operation's mode
3315+ corresponds to TarFile's mode.
3316+ """
3317+ if self.closed:
3318+ raise IOError("%s is closed" % self.__class__.__name__)
3319+ if mode is not None and self.mode not in mode:
3320+ raise IOError("bad operation for mode %r" % self.mode)
3321+
3322+ def _find_link_target(self, tarinfo):
3323+ """Find the target member of a symlink or hardlink member in the
3324+ archive.
3325+ """
3326+ if tarinfo.issym():
3327+ # Always search the entire archive.
3328+ linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
3329+ limit = None
3330+ else:
3331+ # Search the archive before the link, because a hard link is
3332+ # just a reference to an already archived file.
3333+ linkname = tarinfo.linkname
3334+ limit = tarinfo
3335+
3336+ member = self._getmember(linkname, tarinfo=limit, normalize=True)
3337+ if member is None:
3338+ raise KeyError("linkname %r not found" % linkname)
3339+ return member
3340
3341 def __iter__(self):
3342 """Provide an iterator object.
3343@@ -1060,136 +2424,25 @@
3344 else:
3345 return TarIter(self)
3346
3347- def _buftoinfo(self, buf):
3348- """Transform a 512 byte block to a TarInfo instance.
3349- """
3350- tarinfo = TarInfo()
3351- tarinfo.name = nts(buf[0:100])
3352- tarinfo.mode = int(buf[100:107], 8)
3353- tarinfo.uid = int(buf[108:115],8)
3354- tarinfo.gid = int(buf[116:123],8)
3355- tarinfo.size = long(buf[124:135], 8)
3356- tarinfo.mtime = long(buf[136:147], 8)
3357- # chksum stored as a six digit octal number with
3358- # leading zeroes followed by a nul and then a space
3359- tarinfo.chksum = int(buf[148:154], 8)
3360- tarinfo.type = buf[156:157]
3361- tarinfo.linkname = nts(buf[157:257])
3362- tarinfo.uname = nts(buf[265:297])
3363- tarinfo.gname = nts(buf[297:329])
3364- try:
3365- tarinfo.devmajor = int(buf[329:336], 8)
3366- tarinfo.devminor = int(buf[337:344], 8)
3367- except ValueError:
3368- tarinfo.devmajor = tarinfo.devmajor = 0
3369- tarinfo.prefix = buf[345:500]
3370- if tarinfo.chksum != calc_chksum(buf):
3371- self._dbg(1, "tarfile: Bad Checksum\n")
3372- return tarinfo
3373-
3374- def _proc_gnulong(self, tarinfo, type):
3375- """Evaluate the blocks that hold a GNU longname
3376- or longlink member.
3377- """
3378- name = None
3379- linkname = None
3380- #may be some sanity checking should be done here
3381- #assert tarinfo.size < 1000 * BLOCKSIZE, "Filename appears to be too long!"
3382- buf = self.fileobj.read(BLOCKSIZE)
3383- if not buf: return None
3384- namesize = tarinfo.size - BLOCKSIZE
3385- self.offset += BLOCKSIZE
3386- # may be the whole name should be read with one operation?
3387- while namesize > 0:
3388- buf += self.fileobj.read(BLOCKSIZE)
3389- if not buf: return None
3390- self.offset += BLOCKSIZE
3391- namesize -= BLOCKSIZE
3392- if type == GNUTYPE_LONGNAME: name = nts(buf)
3393- if type == GNUTYPE_LONGLINK: linkname = nts(buf)
3394-
3395- buf = self.fileobj.read(BLOCKSIZE)
3396- if not buf: return None
3397- tarinfo = self._buftoinfo(buf)
3398- if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
3399- tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
3400- if not tarinfo: return None
3401- if name is not None:
3402- tarinfo.name = name
3403- if linkname is not None:
3404- tarinfo.linkname = linkname
3405- self.offset += BLOCKSIZE
3406- return tarinfo
3407-
3408-
3409-
3410- def _return_gnulong(self, name, type):
3411- """Insert a GNU longname/longlink member into the archive.
3412- It consists of a common tar header, with the length
3413- of the longname as size, followed by a data block,
3414- which contains the longname as a null terminated string.
3415- """
3416- tarinfo = TarInfo()
3417- tarinfo.name = "././@LongLink"
3418- tarinfo.type = type
3419- tarinfo.mode = 0
3420- tarinfo.size = len(name)
3421-
3422- residual = (tarinfo.size % BLOCKSIZE)
3423- return "%s%s%s" % (tarinfo.getheader(), name,
3424- "\0" * ((BLOCKSIZE - residual) * (residual > 0)))
3425-
3426- def _proc_sparse(self, tarinfo):
3427- """Analyze a GNU sparse header plus extra headers.
3428- """
3429- buf = tarinfo.getheader()
3430- sp = _ringbuffer()
3431- pos = 386
3432- lastpos = 0l
3433- realpos = 0l
3434- try:
3435- # There are 4 possible sparse structs in the
3436- # first header.
3437- for i in range(4): #@UnusedVariable
3438- offset = int(buf[pos:pos + 12], 8)
3439- numbytes = int(buf[pos + 12:pos + 24], 8)
3440- if offset > lastpos:
3441- sp.append(_hole(lastpos, offset - lastpos))
3442- sp.append(_data(offset, numbytes, realpos))
3443- realpos += numbytes
3444- lastpos = offset + numbytes
3445- pos += 24
3446-
3447- isextended = ord(buf[482])
3448- origsize = int(buf[483:495], 8)
3449-
3450- # If the isextended flag is given,
3451- # there are extra headers to process.
3452- while isextended == 1:
3453- buf = self.fileobj.read(BLOCKSIZE)
3454- self.offset += BLOCKSIZE
3455- pos = 0
3456- for i in range(21): #@UnusedVariable
3457- offset = int(buf[pos:pos + 12], 8)
3458- numbytes = int(buf[pos + 12:pos + 24], 8)
3459- if offset > lastpos:
3460- sp.append(_hole(lastpos, offset - lastpos))
3461- sp.append(_data(offset, numbytes, realpos))
3462- realpos += numbytes
3463- lastpos = offset + numbytes
3464- pos += 24
3465- isextended = ord(buf[504])
3466- except ValueError:
3467- pass
3468- if lastpos < origsize:
3469- sp.append(_hole(lastpos, origsize - lastpos))
3470-
3471- tarinfo.sparse = sp
3472- return origsize
3473-
3474 def _dbg(self, level, msg):
3475+ """Write debugging output to sys.stderr.
3476+ """
3477 if level <= self.debug:
3478- sys.stdout.write(msg)
3479+ print >> sys.stderr, msg
3480+
3481+ def __enter__(self):
3482+ self._check()
3483+ return self
3484+
3485+ def __exit__(self, type, value, traceback):
3486+ if type is None:
3487+ self.close()
3488+ else:
3489+ # An exception occurred. We must not call close() because
3490+ # it would try to write end-of-archive blocks and padding.
3491+ if not self._extfileobj:
3492+ self.fileobj.close()
3493+ self.closed = True
3494 # class TarFile
3495
3496 class TarIter:
3497@@ -1200,9 +2453,10 @@
3498 """
3499
3500 def __init__(self, tarfile):
3501- """Construct a TarIter instance.
3502+ """Construct a TarIter object.
3503 """
3504 self.tarfile = tarfile
3505+ self.index = 0
3506 def __iter__(self):
3507 """Return iterator object.
3508 """
3509@@ -1211,12 +2465,21 @@
3510 """Return the next item using TarFile's next() method.
3511 When all members have been read, set TarFile as _loaded.
3512 """
3513- tarinfo = self.tarfile.next()
3514- if not tarinfo:
3515- self.tarfile._loaded = 1
3516- raise StopIteration
3517+ # Fix for SF #1100429: Under rare circumstances it can
3518+ # happen that getmembers() is called during iteration,
3519+ # which will cause TarIter to stop prematurely.
3520+ if not self.tarfile._loaded:
3521+ tarinfo = self.tarfile.next()
3522+ if not tarinfo:
3523+ self.tarfile._loaded = True
3524+ raise StopIteration
3525+ else:
3526+ try:
3527+ tarinfo = self.tarfile.members[self.index]
3528+ except IndexError:
3529+ raise StopIteration
3530+ self.index += 1
3531 return tarinfo
3532-# class TarIter
3533
3534 # Helper classes for sparse file support
3535 class _section:
3536@@ -1248,7 +2511,7 @@
3537 self.idx = 0
3538 def find(self, offset):
3539 idx = self.idx
3540- while 1:
3541+ while True:
3542 item = self[idx]
3543 if offset in item:
3544 break
3545@@ -1261,153 +2524,9 @@
3546 self.idx = idx
3547 return item
3548
3549-class _FileObject:
3550- """File-like object for reading an archive member,
3551- is returned by TarFile.extractfile().
3552- Support for sparse files included.
3553- """
3554-
3555- def __init__(self, tarfile, tarinfo):
3556- self.tarfile = tarfile
3557- self.fileobj = tarfile.fileobj
3558- self.name = tarinfo.name
3559- self.mode = "r"
3560- self.closed = 0
3561- self.offset = tarinfo.offset_data
3562- self.size = tarinfo.size
3563- self.pos = 0l
3564- self.linebuffer = ""
3565- if tarinfo.issparse():
3566- self.sparse = tarinfo.sparse
3567- self.read = self._readsparse
3568- else:
3569- self.read = self._readnormal
3570-
3571- def readline(self, size=-1):
3572- """Read a line with approx. size.
3573- If size is negative, read a whole line.
3574- readline() and read() must not be mixed up (!).
3575- """
3576- if size < 0:
3577- size = sys.maxint
3578-
3579- nl = self.linebuffer.find("\n")
3580- if nl >= 0:
3581- nl = min(nl, size)
3582- else:
3583- size -= len(self.linebuffer)
3584- while nl < 0:
3585- buf = self.read(min(size, 100))
3586- if not buf:
3587- break
3588- self.linebuffer += buf
3589- size -= len(buf)
3590- if size <= 0:
3591- break
3592- nl = self.linebuffer.find("\n")
3593- if nl == -1:
3594- s = self.linebuffer
3595- self.linebuffer = ""
3596- return s
3597- buf = self.linebuffer[:nl]
3598- self.linebuffer = self.linebuffer[nl + 1:]
3599- while buf[-1:] == "\r":
3600- buf = buf[:-1]
3601- return buf + "\n"
3602-
3603- def readlines(self):
3604- """Return a list with all (following) lines.
3605- """
3606- result = []
3607- while 1:
3608- line = self.readline()
3609- if not line: break
3610- result.append(line)
3611- return result
3612-
3613- def _readnormal(self, size=None):
3614- """Read operation for regular files.
3615- """
3616- if self.closed:
3617- raise ValueError, "I/O operation on closed file"
3618- #self.fileobj.seek(self.offset + self.pos)
3619- bytesleft = self.size - self.pos
3620- if size is None:
3621- bytestoread = bytesleft
3622- else:
3623- bytestoread = min(size, bytesleft)
3624- self.pos += bytestoread
3625- self.tarfile.offset += bytestoread
3626- return self.fileobj.read(bytestoread)
3627-
3628- def _readsparse(self, size=None):
3629- """Read operation for sparse files.
3630- """
3631- if self.closed:
3632- raise ValueError, "I/O operation on closed file"
3633-
3634- if size is None:
3635- size = self.size - self.pos
3636-
3637- data = ""
3638- while size > 0:
3639- buf = self._readsparsesection(size)
3640- if not buf:
3641- break
3642- size -= len(buf)
3643- data += buf
3644- return data
3645-
3646- def _readsparsesection(self, size):
3647- """Read a single section of a sparse file.
3648- """
3649- section = self.sparse.find(self.pos)
3650-
3651- if section is None:
3652- return ""
3653-
3654- toread = min(size, section.offset + section.size - self.pos)
3655- if isinstance(section, _data):
3656- realpos = section.realpos + self.pos - section.offset
3657- self.pos += toread
3658- self.fileobj.seek(self.offset + realpos)
3659- return self.fileobj.read(toread)
3660- else:
3661- self.pos += toread
3662- return "\0" * toread
3663-
3664- def tell(self):
3665- """Return the current file position.
3666- """
3667- return self.pos
3668-
3669- def seek(self, pos, whence=0):
3670- """Seek to a position in the file.
3671- """
3672- self.linebuffer = ""
3673- if whence == 0:
3674- self.pos = min(max(pos, 0), self.size)
3675- if whence == 1:
3676- if pos < 0:
3677- self.pos = max(self.pos + pos, 0)
3678- else:
3679- self.pos = min(self.pos + pos, self.size)
3680- if whence == 2:
3681- self.pos = max(min(self.size + pos, self.size), 0)
3682-
3683- def close(self):
3684- """Close the file object.
3685- """
3686- self.closed = 1
3687-#class _FileObject
3688-
3689 #---------------------------------------------
3690 # zipfile compatible TarFile class
3691-#
3692-# for details consult zipfile's documentation
3693 #---------------------------------------------
3694-import cStringIO
3695-
3696 TAR_PLAIN = 0 # zipfile.ZIP_STORED
3697 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
3698 class TarFileCompat:
3699@@ -1415,16 +2534,18 @@
3700 ZipFile class.
3701 """
3702 def __init__(self, file, mode="r", compression=TAR_PLAIN):
3703+ from warnings import warnpy3k
3704+ warnpy3k("the TarFileCompat class has been removed in Python 3.0",
3705+ stacklevel=2)
3706 if compression == TAR_PLAIN:
3707- self.tarfile = open(file, mode)
3708+ self.tarfile = TarFile.taropen(file, mode)
3709 elif compression == TAR_GZIPPED:
3710- self.tarfile = gzopen(file, mode)
3711+ self.tarfile = TarFile.gzopen(file, mode)
3712 else:
3713- raise ValueError, "unknown compression constant"
3714+ raise ValueError("unknown compression constant")
3715 if mode[0:1] == "r":
3716 members = self.tarfile.getmembers()
3717- for i in range(len(members)):
3718- m = members[i]
3719+ for m in members:
3720 m.filename = m.name
3721 m.file_size = m.size
3722 m.date_time = time.gmtime(m.mtime)[:6]
3723@@ -1444,270 +2565,32 @@
3724 def write(self, filename, arcname=None, compress_type=None):
3725 self.tarfile.add(filename, arcname)
3726 def writestr(self, zinfo, bytes):
3727+ try:
3728+ from cStringIO import StringIO
3729+ except ImportError:
3730+ from StringIO import StringIO
3731 import calendar
3732- zinfo.name = zinfo.filename
3733- zinfo.size = zinfo.file_size
3734- zinfo.mtime = calendar.timegm(zinfo.date_time)
3735- self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes))
3736+ tinfo = TarInfo(zinfo.filename)
3737+ tinfo.size = len(bytes)
3738+ tinfo.mtime = calendar.timegm(zinfo.date_time)
3739+ self.tarfile.addfile(tinfo, StringIO(bytes))
3740 def close(self):
3741 self.tarfile.close()
3742 #class TarFileCompat
3743
3744-if __name__ == "__main__":
3745- # a "light-weight" implementation of GNUtar ;-)
3746- usage = """
3747-Usage: %s [options] [files]
3748-
3749--h display this help message
3750--c create a tarfile
3751--r append to an existing archive
3752--x extract archive
3753--t list archive contents
3754--f FILENAME
3755- use archive FILENAME, else STDOUT (-c)
3756--z filter archive through gzip
3757--C DIRNAME
3758- with opt -x: extract to directory DIRNAME
3759- with opt -c, -r: put files to archive under DIRNAME
3760--v verbose output
3761--q quiet
3762-
3763-wildcards *, ?, [seq], [!seq] are accepted.
3764- """ % sys.argv[0]
3765-
3766- import getopt, glob
3767+#--------------------
3768+# exported functions
3769+#--------------------
3770+def is_tarfile(name):
3771+ """Return True if name points to a tar archive that we
3772+ are able to handle, else return False.
3773+ """
3774 try:
3775- opts, args = getopt.getopt(sys.argv[1:], "htcrzxf:C:qv")
3776- except getopt.GetoptError, e:
3777- print
3778- print "ERROR:", e
3779- print usage
3780- sys.exit(0)
3781-
3782- file = None
3783- mode = None
3784- dir = None
3785- comp = 0
3786- debug = 0
3787- for o, a in opts:
3788- if o == "-t": mode = "l" # list archive
3789- if o == "-c": mode = "w" # write to archive
3790- if o == "-r": mode = "a" # append to archive
3791- if o == "-x": mode = "r" # extract from archive
3792- if o == "-f": file = a # specify filename else use stdout
3793- if o == "-C": dir = a # change to dir
3794- if o == "-z": comp = 1 # filter through gzip
3795- if o == "-v": debug = 2 # verbose mode
3796- if o == "-q": debug = 0 # quiet mode
3797- if o == "-h": # help message
3798- print usage
3799- sys.exit(0)
3800-
3801- if not mode:
3802- print usage
3803- sys.exit(0)
3804-
3805- if comp:
3806- func = gzopen
3807- else:
3808- func = open
3809-
3810- if not file or file == "-":
3811- if mode != "w":
3812- print usage
3813- sys.exit(0)
3814- debug = 0
3815- # If under Win32, set stdout to binary.
3816- try:
3817- import msvcrt
3818- msvcrt.setmode(1, os.O_BINARY) #@UndefinedVariable
3819- except ImportError:
3820- pass
3821- tarfile = func("sys.stdout.tar", mode, 9, sys.stdout)
3822- else:
3823- if mode == "l":
3824- tarfile = func(file, "r")
3825- else:
3826- tarfile = func(file, mode)
3827-
3828- tarfile.debug = debug
3829-
3830- if mode == "r":
3831- if dir is None:
3832- dir = ""
3833- for tarinfo in tarfile:
3834- tarfile.extract(tarinfo, dir)
3835- elif mode == "l":
3836- tarfile.list(debug)
3837- else:
3838- for arg in args:
3839- files = glob.glob(arg)
3840- for f in files:
3841- tarfile.add(f, dir)
3842- tarfile.close()
3843-
3844-
3845-class TarFromIterator(TarFile):
3846- """Readable tarfile-like object generated from iterator
3847- """
3848- # These various status numbers indicate what we are in the process
3849- # of doing in the tarfile.
3850- BEGIN = 0 # next step is to read tarinfo, write new header
3851- MIDDLE_OF_FILE = 1 # in process of writing file data
3852- END = 2 # end of data
3853-
3854- # Buffer is added to in multiples of following
3855- BUFFER_ADDLEN = 64 * 1024
3856-
3857- def __init__(self, pair_iter):
3858- """Construct a TarFromIterator instance. pair_iter is an
3859- iterator of (TarInfo, fileobj) objects, which fileobj should
3860- be a file-like object opened for reading, or None. The
3861- fileobjs will be closed before the next element in the
3862- iterator is read.
3863- """
3864- self.closed = None
3865- self.name = None
3866- self.mode = "rb"
3867- self.pair_iter = pair_iter
3868-
3869- self.init_datastructures()
3870- self.status = self.BEGIN
3871- self.cur_tarinfo, self.cur_fileobj = None, None
3872- self.cur_pos_in_fileobj = 0
3873- self.buffer = ""
3874- # holds current position as seen by reading client. This is
3875- # distinct from self.offset.
3876- self.tar_iter_offset = 0
3877-
3878- def seek(self, offset):
3879- """Seek to current position. Just read and discard some amount"""
3880- if offset < self.tar_iter_offset:
3881- raise TarError("Seeks in TarFromIterator must go forwards,\n"
3882- "Instead asking for %s from %s" %
3883- (offset, self.tar_iter_offset))
3884- while offset - self.tar_iter_offset >= self.BUFFER_ADDLEN:
3885- buf = self.read(self.BUFFER_ADDLEN)
3886- if not buf: return # eof
3887- self.read(offset - self.tar_iter_offset)
3888-
3889- def read(self, length = -1):
3890- """Return next length bytes, or everything if length < 0"""
3891- if length < 0:
3892- while 1:
3893- if not self._addtobuffer(): break
3894- result = self.buffer
3895- self.buffer = ""
3896- else:
3897- while len(self.buffer) < length:
3898- if not self._addtobuffer(): break
3899- # It's possible that length > len(self.buffer)
3900- result = self.buffer[:length]
3901- self.buffer = self.buffer[length:]
3902- self.tar_iter_offset += len(result)
3903- return result
3904-
3905- def _addtobuffer(self):
3906- """Write more data into the buffer. Return None if at end"""
3907- if self.status == self.BEGIN:
3908- # Just write headers into buffer
3909- try: self.cur_tarinfo, self.cur_fileobj = self.pair_iter.next()
3910- except StopIteration:
3911- self._add_final()
3912- self.status = self.END
3913- return None
3914-
3915- # Zero out tarinfo sizes for various file types
3916- if self.cur_tarinfo.type in (LNKTYPE, SYMTYPE,
3917- FIFOTYPE, CHRTYPE, BLKTYPE):
3918- self.cur_tarinfo.size = 0l
3919-
3920- full_headers = self._get_full_headers(self.cur_tarinfo)
3921- self.buffer += full_headers
3922- self.offset += len(full_headers)
3923- assert len(full_headers) % BLOCKSIZE == 0
3924-
3925- if self.cur_fileobj is None: # no data with header
3926- self.status = self.BEGIN
3927- self._finish_fileobj()
3928- else:
3929- self.status = self.MIDDLE_OF_FILE
3930- self.cur_pos_in_fileobj = 0
3931- return 1
3932- elif self.status == self.MIDDLE_OF_FILE:
3933- # Add next chunk of self.cur_fileobj to self.buffer
3934- l = min(self.BUFFER_ADDLEN,
3935- self.cur_tarinfo.size - self.cur_pos_in_fileobj)
3936- s = self.cur_fileobj.read(l)
3937- self.cur_pos_in_fileobj += len(s)
3938- if len(s) == 0:
3939- if l != 0: raise IOError, "end of file reached"
3940- blocks, remainder = divmod(self.cur_tarinfo.size, BLOCKSIZE)
3941- if remainder > 0:
3942- self.buffer += "\0" * (BLOCKSIZE - remainder)
3943- blocks += 1
3944- self.cur_fileobj.close()
3945- self.offset += blocks * BLOCKSIZE
3946- self._finish_fileobj()
3947- self.status = self.BEGIN
3948- else: self.buffer += s
3949- return 1
3950- elif self.status == self.END: return None
3951- assert 0
3952-
3953- def _finish_fileobj(self):
3954- """Update some variables when done writing fileobj"""
3955- return # Skip saving tarinfo information to save memory
3956- self.members.append(self.cur_tarinfo)
3957- self.membernames.append(self.cur_tarinfo.name)
3958- self.chunks.append(self.offset)
3959-
3960- def _add_final(self):
3961- """Add closing footer to buffer"""
3962- blocks, remainder = divmod(self.offset, RECORDSIZE) #@UnusedVariable
3963- if remainder > 0: self.buffer += "\0" * (RECORDSIZE - remainder)
3964-
3965- def close(self):
3966- """Close file obj"""
3967- assert not self.closed
3968- self.closed = 1
3969-
3970-
3971-def uid2uname(uid):
3972- """Return uname of uid, or raise KeyError if none"""
3973- if uid_dict is None: set_pwd_dict()
3974- return uid_dict[uid]
3975-
3976-def uname2uid(uname):
3977- """Return uid of given uname, or raise KeyError if none"""
3978- if uname_dict is None: set_pwd_dict()
3979- return uname_dict[uname]
3980-
3981-def set_pwd_dict():
3982- """Set global pwd caching dictionaries uid_dict and uname_dict"""
3983- global uid_dict, uname_dict
3984- assert uid_dict is None and uname_dict is None and pwd
3985- uid_dict = {}; uname_dict = {}
3986- for entry in pwd.getpwall():
3987- uname = entry[0]; uid = entry[2]
3988- uid_dict[uid] = uname
3989- uname_dict[uname] = uid
3990-
3991-def gid2gname(gid):
3992- """Return group name of gid, or raise KeyError if none"""
3993- if gid_dict is None: set_grp_dict()
3994- return gid_dict[gid]
3995-
3996-def gname2gid(gname):
3997- """Return gid of given group name, or raise KeyError if none"""
3998- if gname_dict is None: set_grp_dict()
3999- return gname_dict[gname]
4000-
4001-def set_grp_dict():
4002- global gid_dict, gname_dict
4003- assert gid_dict is None and gname_dict is None and grp
4004- gid_dict = {}; gname_dict = {}
4005- for entry in grp.getgrall():
4006- gname = entry[0]; gid = entry[2]
4007- gid_dict[gid] = gname
4008- gname_dict[gname] = gid
4009+ t = open(name)
4010+ t.close()
4011+ return True
4012+ except TarError:
4013+ return False
4014+
4015+bltn_open = open
4016+open = TarFile.open
4017
4018=== modified file 'duplicity/util.py'
4019--- duplicity/util.py 2011-08-18 19:17:55 +0000
4020+++ duplicity/util.py 2011-08-23 18:28:26 +0000
4021@@ -28,6 +28,8 @@
4022 import string
4023 import traceback
4024
4025+from duplicity import tarfile
4026+
4027 import duplicity.globals as globals
4028 import duplicity.log as log
4029
4030@@ -69,6 +71,31 @@
4031 else:
4032 raise
4033
4034+class FakeTarFile:
4035+ debug = 0
4036+ def __iter__(self):
4037+ return iter([])
4038+ def close(self):
4039+ pass
4040+
4041+def make_tarfile(mode, fp):
4042+ # We often use 'empty' tarfiles for signatures that haven't been filled out
4043+ # yet. So we want to ignore ReadError exceptions, which are used to signal
4044+ # this.
4045+ try:
4046+ return tarfile.TarFile("arbitrary", mode, fp)
4047+ except tarfile.ReadError:
4048+ return FakeTarFile()
4049+
4050+def get_tarinfo_name(ti):
4051+ # Python versions before 2.6 ensure that directories end with /, but 2.6
4052+ # and later ensure they they *don't* have /. ::shrug:: Internally, we
4053+ # continue to use pre-2.6 method.
4054+ if ti.isdir() and not ti.name.endswith("/"):
4055+ return ti.name + "/"
4056+ else:
4057+ return ti.name
4058+
4059 def ignore_missing(fn, filename):
4060 """
4061 Execute fn on filename. Ignore ENOENT errors, otherwise raise exception.
4062
4063=== modified file 'rdiffdir'
4064--- rdiffdir 2010-11-20 15:39:00 +0000
4065+++ rdiffdir 2011-08-23 18:28:26 +0000
4066@@ -33,7 +33,6 @@
4067 from duplicity import diffdir
4068 from duplicity import patchdir
4069 from duplicity import log
4070-from duplicity import tarfile
4071 from duplicity import globals
4072 from duplicity import selection
4073 from duplicity import path
4074
4075=== added file 'tarfile-CHANGES'
4076--- tarfile-CHANGES 1970-01-01 00:00:00 +0000
4077+++ tarfile-CHANGES 2011-08-23 18:28:26 +0000
4078@@ -0,0 +1,3 @@
4079+tarfile.py is a copy of python2.7's tarfile.py.
4080+
4081+No changes besides 2.4 compatibility have been made.
4082
4083=== modified file 'tarfile-LICENSE'
4084--- tarfile-LICENSE 2002-10-29 01:49:46 +0000
4085+++ tarfile-LICENSE 2011-08-23 18:28:26 +0000
4086@@ -89,4 +89,4 @@
4087 README Version
4088 --------------
4089
4090-$Id: tarfile-LICENSE,v 1.1 2002/10/29 01:49:46 bescoto Exp $
4091\ No newline at end of file
4092+$Id: tarfile-LICENSE,v 1.1 2002/10/29 01:49:46 bescoto Exp $
4093
4094=== modified file 'testing/diffdirtest.py'
4095--- testing/diffdirtest.py 2010-11-20 15:32:59 +0000
4096+++ testing/diffdirtest.py 2011-08-23 18:28:26 +0000
4097@@ -26,6 +26,7 @@
4098 from duplicity.path import * #@UnusedWildImport
4099 from duplicity import diffdir
4100 from duplicity import selection
4101+from duplicity import util
4102 from duplicity import tarfile #@Reimport
4103
4104 config.setup()
4105@@ -133,8 +134,9 @@
4106 "snapshot/file_to_directory/"]
4107 for tarinfo in tarfile.TarFile("testfiles/output/dir1dir2.difftar",
4108 "r"):
4109- if tarinfo.name in changed_files:
4110- changed_files.remove(tarinfo.name)
4111+ tiname = util.get_tarinfo_name(tarinfo)
4112+ if tiname in changed_files:
4113+ changed_files.remove(tiname)
4114 assert not changed_files, ("Following files not found:\n"
4115 + "\n".join(changed_files))
4116
4117
4118=== modified file 'testing/patchdirtest.py'
4119--- testing/patchdirtest.py 2011-06-17 18:22:28 +0000
4120+++ testing/patchdirtest.py 2011-08-23 18:28:26 +0000
4121@@ -145,25 +145,18 @@
4122
4123 def make_bad_tar(filename):
4124 """Write attack tarfile to filename"""
4125- def iterate_one_pair(path):
4126- """Iterate one (tarinfo, fp) pair
4127-
4128- file object will be empty, and tarinfo will have path
4129- "snapshot/../warning-security-error"
4130-
4131- """
4132- path.index = ("diff", "..", "warning-security-error")
4133- ti = path.get_tarinfo()
4134- fp = cStringIO.StringIO("")
4135- yield (ti, fp)
4136+ tf = tarfile.TarFile(name=filename, mode="w")
4137+
4138+ # file object will be empty, and tarinfo will have path
4139+ # "snapshot/../warning-security-error"
4140 assert not os.system("cat /dev/null >testfiles/output/file")
4141- tf = tarfile.TarFromIterator(iterate_one_pair(
4142- Path("testfiles/output/file")))
4143- tfbuf = tf.read()
4144+ path = Path("testfiles/output/file")
4145+ path.index = ("diff", "..", "warning-security-error")
4146+ ti = path.get_tarinfo()
4147+ fp = cStringIO.StringIO("")
4148+ tf.addfile(ti, fp)
4149
4150- fout = open(filename, "wb")
4151- fout.write(tfbuf)
4152- assert not fout.close()
4153+ tf.close()
4154
4155 self.deltmp()
4156 make_bad_tar("testfiles/output/bad.tar")
4157
4158=== modified file 'testing/test_tarfile.py'
4159--- testing/test_tarfile.py 2010-11-20 15:32:59 +0000
4160+++ testing/test_tarfile.py 2011-08-23 18:28:26 +0000
4161@@ -25,7 +25,7 @@
4162 # $Id: test_tarfile.py,v 1.11 2009/04/02 14:47:12 loafman Exp $
4163
4164 import config
4165-import sys, os, shutil, StringIO, tempfile, unittest, stat, pwd, grp
4166+import sys, os, shutil, StringIO, tempfile, unittest, stat
4167
4168 from duplicity import tarfile
4169
4170@@ -187,52 +187,11 @@
4171 tf.add(filename, filename, 0)
4172 tf.close()
4173
4174- def make_temptar_iterator(self):
4175- """Tar up tempdir using an iterator"""
4176- try:
4177- os.lstat("temp2.tar")
4178- except OSError:
4179- pass
4180- else:
4181- assert not os.system("rm temp2.tar")
4182-
4183- self.make_tempdir()
4184- def generate_pairs(tfi_list):
4185- for filename in self.files_in_tempdir:
4186- ti = tarfile.TarInfo()
4187- ti.set_arcname(filename)
4188- ti.init_from_stat(os.lstat(filename))
4189- if filename == "tempdir/hardlinked2":
4190- ti.type = tarfile.LNKTYPE
4191- ti.linkname = "tempdir/hardlinked1"
4192- yield (ti, None)
4193- elif filename == "tempdir" or filename == "tempdir/fifo":
4194- yield (ti, None)
4195- elif filename == "tempdir/symlink":
4196- ti.linkname = os.readlink(filename)
4197- yield (ti, None)
4198- else:
4199- yield (ti, open(filename, "rb"))
4200- tfi_list = [None]
4201- tfi = tarfile.TarFromIterator(generate_pairs(tfi_list))
4202- tfi_list[0] = tfi # now generate_pairs can find tfi
4203-
4204- buf = tfi.read()
4205- tfi.close()
4206- fout = open("temp2.tar", "wb")
4207- fout.write(buf)
4208- fout.close()
4209-
4210 def test_tarfile_creation(self):
4211 """Create directory, make tarfile, extract using gnutar, compare"""
4212 self.make_temptar()
4213 self.extract_and_compare_tarfile()
4214
4215- def test_tarfile_creation_from_iterator(self):
4216- """Same as test_tarfile_creation, but use iterator interface"""
4217- self.make_temptar_iterator()
4218- self.extract_and_compare_tarfile()
4219-
4220 def extract_and_compare_tarfile(self):
4221 os.system("rm -r tempdir")
4222 assert not os.system("tar -xf temp2.tar")
4223@@ -354,51 +313,13 @@
4224 def seek(self, position):
4225 #print "Seeking to ", position
4226 return self.infp.seek(position)
4227+ def tell(self):
4228+ #print "Telling"
4229+ return self.infp.tell()
4230 def close(self):
4231 #print "Closing"
4232 return self.infp.close()
4233
4234
4235-class PasswordTest(unittest.TestCase):
4236- """Test retrieving, storing password information"""
4237- def compare(self, thunk1, thunk2):
4238- """Make sure thunk1 and thunk2 return the same"""
4239- try: result1 = thunk1()
4240- except KeyError, exc1: keyerror = 1 #@UnusedVariable
4241- else: keyerror = 0
4242-
4243- try: result2 = thunk2()
4244- except KeyError, exc2: #@UnusedVariable
4245- assert keyerror, "Got KeyError vs " + str(result2)
4246- return
4247- else: assert not keyerror, "Got %s vs KeyError" % (str(result1),)
4248-
4249- assert result1 == result2, (result1, result2)
4250-
4251- def test_uid2uname(self):
4252- """Test getting unames by uid"""
4253- for uid in (0, 500, 789, 0, 0, 500):
4254- self.compare(lambda: tarfile.uid2uname(uid),
4255- lambda: pwd.getpwuid(uid)[0])
4256-
4257- def test_gid2gname(self):
4258- """Test getting group names by gid"""
4259- for gid in (0, 500, 789, 0, 0, 500):
4260- self.compare(lambda: tarfile.gid2gname(gid),
4261- lambda: grp.getgrgid(gid)[0])
4262-
4263- def test_gname2gid(self):
4264- """Test getting gids from gnames"""
4265- for gname in ('root', 'ben', 'bin', 'sanothua', 'root', 'root'):
4266- self.compare(lambda: tarfile.gname2gid(gname),
4267- lambda: grp.getgrnam(gname)[2])
4268-
4269- def test_uname2uid(self):
4270- """Test getting uids from unames"""
4271- for uname in ('root', 'ben', 'bin', 'sanothua', 'root', 'root'):
4272- self.compare(lambda: tarfile.uname2uid(uname),
4273- lambda: pwd.getpwnam(uname)[2])
4274-
4275-
4276 if __name__ == "__main__":
4277 unittest.main()

Subscribers

People subscribed via source and target branches

to all changes: