Merge lp:~jderose/filestore/v1-on into lp:filestore

Proposed by Jason Gerard DeRose
Status: Merged
Merged at revision: 318
Proposed branch: lp:~jderose/filestore/v1-on
Merge into: lp:filestore
Diff against target: 1022 lines (+434/-88)
4 files modified
filestore/__init__.py (+147/-31)
filestore/misc.py (+10/-0)
filestore/tests/__init__.py (+257/-57)
filestore/tests/test_misc.py (+20/-0)
To merge this branch: bzr merge lp:~jderose/filestore/v1-on
Reviewer Review Type Date Requested Status
xzcvczx (community) Approve
dmedia Dev Pending
Review via email: mp+161334@code.launchpad.net

Description of the change

For details see this bug:

  https://bugs.launchpad.net/filestore/+bug/1174022

Changes:

  * Makes V1 the active protocol and updates all the related unit tests

  * Adds misc.write_files() helper that writes the test-vector files out into a directory, handy for unit testing things like the Hasher class against a specific protocol version

  * So it can support V0 and V1, Hasher.__init__() now takes optional "protocol" and "enc" kwargs (which default to `VERSION1` and `db32enc` respectively)

  * When FileStore.__init__() sees that ".dmedia/files/" contains a V0, Base32 layout, this directory is moved to ".dmedia/files0/" and the correct V1, Dbase32 layout is created in its place

  * And when above happens, if ".dmedia/store.json" exists it will be moved to ".dmedia/store0.json" and a new store.json file will written with the doc['_id'] re-encoded to Dbase32

  * Adds doodle `Migration` class that verifies files in "files0" according to their V0 ID, while re-hashing with V1 to calculate their new V1 ID

To post a comment you must log in.
lp:~jderose/filestore/v1-on updated
327. By Jason Gerard DeRose

Small tweak to Migration

Revision history for this message
xzcvczx (xzcvczx) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'filestore/__init__.py'
2--- filestore/__init__.py 2013-04-22 06:23:41 +0000
3+++ filestore/__init__.py 2013-04-28 20:23:24 +0000
4@@ -35,7 +35,7 @@
5 22
6 >>> ch = fs.hash_and_move(tmp_fp)
7 >>> ch.id
8-'MV2DIDJV66B7LCAXIAZRPSMN7I3LZJC6ANTODLJGZOZ3ZGTA'
9+'Y685HWMJEE5J39SLBEP4Y3WY7D8Y9JCIQYAFUFQ39MMV84EW'
10 >>> ch.file_size
11 22
12
13@@ -77,16 +77,17 @@
14 import io
15 import stat
16 from base64 import b64encode
17+import json
18 from threading import Thread
19 from queue import Queue
20 from collections import namedtuple
21 import logging
22
23-from dbase32 import random_id, DB32ALPHABET
24-from dbase32.rfc3548 import b32enc, isb32
25+from dbase32 import db32enc, isdb32, random_id, DB32ALPHABET
26+from dbase32.rfc3548 import isb32, b32enc
27
28-from .protocols import TYPE_ERROR
29-from .protocols import VERSION0 as PROTOCOL
30+from .protocols import TYPE_ERROR, VERSION0
31+from .protocols import VERSION1 as PROTOCOL
32
33 try:
34 from _filestore import fallocate, fastread
35@@ -157,8 +158,8 @@
36
37 For example:
38
39- >>> b32enc(hash_leaf(2, b'XYZ'))
40- 'D7GIW5I5NB6SLJC5ALAX4WU7S7CNYUB3ULMECPY67FFQG4F7'
41+ >>> db32enc(hash_leaf(2, b'XYZ'))
42+ 'YLP5A3SDEEP69SSTFRK7J98AUBS7SW4E75RQN9D4HKRCUXWS'
43
44 :param leaf_index: an ``int`` >= 0
45 :param leaf_data: optional ``bytes`` instance with contents of this leaf
46@@ -173,13 +174,13 @@
47 For example:
48
49 >>> hash_root(31415, b'NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN')
50- 'YFZNR7K6DENL77BAMXIYXJZ4VDRPQEYGKGIZT74M4UPLBBAO'
51+ 'YFXKI7P3BFRMDBBLL4UDVHK3QTHGSXKU4VQ3SPJNKF96TBPV'
52
53 :param file_size: an ``int`` >= 1
54 :param leaf_hashes: a ``bytes`` instance that is the concatenated leaf
55 hashes produced by `hash_leaf()`
56 """
57- return b32enc(PROTOCOL.hash_root(file_size, leaf_hashes))
58+ return db32enc(PROTOCOL.hash_root(file_size, leaf_hashes))
59
60
61 class Hasher:
62@@ -187,9 +188,11 @@
63 A helper to keep track of state as you hash leaf after leaf.
64 """
65
66- __slots__ = ('file_size', 'leaf_index', 'array', 'closed')
67+ __slots__ = ('protocol', 'enc', 'file_size', 'leaf_index', 'array', 'closed')
68
69- def __init__(self):
70+ def __init__(self, protocol=PROTOCOL, enc=db32enc):
71+ self.protocol = protocol
72+ self.enc = enc
73 self.file_size = 0
74 self.leaf_index = 0
75 self.array = bytearray()
76@@ -206,7 +209,7 @@
77 )
78 if len(leaf.data) < LEAF_SIZE:
79 self.closed = True
80- leaf_hash = PROTOCOL.hash_leaf(leaf.index, leaf.data)
81+ leaf_hash = self.protocol.hash_leaf(leaf.index, leaf.data)
82 self.array.extend(leaf_hash)
83 self.file_size += len(leaf.data)
84 self.leaf_index += 1
85@@ -216,7 +219,7 @@
86 self.closed = True
87 leaf_hashes = bytes(self.array)
88 return ContentHash(
89- b32enc(PROTOCOL.hash_root(self.file_size, leaf_hashes)),
90+ self.enc(self.protocol.hash_root(self.file_size, leaf_hashes)),
91 self.file_size,
92 leaf_hashes
93 )
94@@ -319,26 +322,33 @@
95
96 def check_id(_id):
97 """
98- Verify that *_id* is a valid base32-encoded ID of the correct length.
99+ Verify that *_id* is a valid Dbase32 encoded ID of the correct length.
100
101 A malicious *_id* could cause path traversal or other security gotchas,
102 thus this sanity check. When *_id* is valid, it is returned unchanged:
103
104- >>> check_id('OMPLTTYVTIJINDZWIS2PBZ4THWA6CTGCGT27RFIDKV7FSTCA')
105- 'OMPLTTYVTIJINDZWIS2PBZ4THWA6CTGCGT27RFIDKV7FSTCA'
106-
107- However, when *_id* does not conform, an `IDError` is raised.
108- raised:
109-
110- >>> check_id('NWBNVXVK5DQGIOW7MYR4K3KA')
111- Traceback (most recent call last):
112- ...
113- filestore.IDError: invalid file ID: 'NWBNVXVK5DQGIOW7MYR4K3KA'
114+ >>> check_id('39AY39AY39AY39AY39AY39AY39AY39AY39AY39AY39AY39AY')
115+ '39AY39AY39AY39AY39AY39AY39AY39AY39AY39AY39AY39AY'
116+
117+ However, if *_id* is the incorrect length, an `IDError` is raised:
118+
119+ >>> check_id('39AY39AY39AY39AY39AY39AY')
120+ Traceback (most recent call last):
121+ ...
122+ filestore.IDError: invalid file ID: '39AY39AY39AY39AY39AY39AY'
123+
124+ Likewise when *_id* is the correct length but contains symbols not included
125+ in the Dbase32 alphabet:
126+
127+ >>> check_id('29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ')
128+ Traceback (most recent call last):
129+ ...
130+ filestore.IDError: invalid file ID: '29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ29AZ'
131
132 """
133 if not isinstance(_id, str):
134 raise TypeError(TYPE_ERROR.format('_id', str, type(_id), _id))
135- if not (len(_id) == DIGEST_B32LEN and isb32(_id)):
136+ if not (len(_id) == DIGEST_B32LEN and isdb32(_id)):
137 raise IDError(_id)
138 return _id
139
140@@ -381,14 +391,14 @@
141 >>> check_root_hash(_id, 21, leaf_hash) # 21 instead of 22 bytes
142 Traceback (most recent call last):
143 ...
144- filestore.RootHashError: 'MV2DIDJV66B7LCAXIAZRPSMN7I3LZJC6ANTODLJGZOZ3ZGTA'
145+ filestore.RootHashError: 'Y685HWMJEE5J39SLBEP4Y3WY7D8Y9JCIQYAFUFQ39MMV84EW'
146
147 If the claimed *file_size* and *leaf_hashes* are correct, the 3 values are
148 returned in a `ContentHash` named tuple:
149
150 >>> ch = check_root_hash(_id, 22, leaf_hash)
151 >>> ch.id
152- 'MV2DIDJV66B7LCAXIAZRPSMN7I3LZJC6ANTODLJGZOZ3ZGTA'
153+ 'Y685HWMJEE5J39SLBEP4Y3WY7D8Y9JCIQYAFUFQ39MMV84EW'
154 >>> ch.file_size
155 22
156
157@@ -820,11 +830,103 @@
158 return StatVFS(size, used, avail, readonly, st.f_frsize)
159
160
161+def is_v0_files(files):
162+ for name in NAMES_DIFF:
163+ if path.isdir(path.join(files, name)):
164+ return True
165+ return False
166+
167+
168+def dumps(obj):
169+ return json.dumps(obj,
170+ ensure_ascii=False,
171+ sort_keys=True,
172+ separators=(',',': '),
173+ indent=4,
174+ )
175+
176+
177+def migrate_store_doc(basedir):
178+ store = path.join(basedir, 'store.json')
179+ store0 = path.join(basedir, 'store0.json')
180+ try:
181+ doc = json.load(open(store, 'r'))
182+ except FileNotFoundError:
183+ log.error("'store.json' does not exist in %r", basedir)
184+ return False
185+
186+ if path.exists(store0):
187+ raise Exception("'store0.json' already exists in {!r}".format(basedir))
188+
189+ log.warning("Moving V0 'store.json' to 'store0.json' in %r", basedir)
190+ os.rename(store, store0)
191+
192+ from dbase32.rfc3548 import b32dec
193+ assert doc.get('migrated') is None
194+ old_id = doc['_id']
195+ new_id = db32enc(b32dec(old_id))
196+ log.warning('Migrating FileStore ID from %r to %r in %r',
197+ old_id, new_id, basedir)
198+ doc['_id'] = new_id
199+ doc['migrated'] = True
200+ text = dumps(doc)
201+ tmp = path.join(basedir, 'store.json.' + random_id())
202+ fp = open(tmp, 'x')
203+ fp.write(text)
204+ fp.flush()
205+ os.fsync(fp.fileno())
206+ os.chmod(fp.fileno(), 0o444)
207+ fp.close()
208+ os.rename(tmp, store)
209+ return True
210+
211+
212+###################################################
213+# The `Migration` class use for V0 => V1 migration:
214+
215+class Migration:
216+ def __init__(self, fs):
217+ assert isinstance(fs, FileStore)
218+ self.fs = fs
219+ self.files0 = fs.join('files0')
220+ assert path.isdir(self.files0)
221+
222+ def __iter__(self):
223+ for prefix in B32NAMES:
224+ subdir = path.join(self.files0, prefix)
225+ for name in sorted(os.listdir(subdir)):
226+ src = path.join(subdir, name)
227+ v0_id = prefix + name
228+ assert path.isfile(src) or path.islink(src)
229+ assert isb32(v0_id) and len(v0_id) == 48
230+
231+ if path.islink(src):
232+ log.info('Reading symlink %r', src)
233+ yield (v0_id, os.readlink(src), None)
234+ else:
235+ src_fp = open(src, 'rb')
236+ h0 = Hasher(protocol=VERSION0, enc=b32enc)
237+ h1 = Hasher()
238+ for leaf in reader_iter(src_fp):
239+ h0.hash_leaf(leaf)
240+ h1.hash_leaf(leaf)
241+ ch0 = h0.content_hash()
242+ ch1 = h1.content_hash()
243+ assert isdb32(ch1.id)
244+ if ch0.id != v0_id:
245+ yield (v0_id, None, None)
246+ else:
247+ dst = self.fs.path(ch1.id)
248+ log.info('Moving %r to %r', src, dst)
249+ os.rename(src, dst)
250+ os.symlink(ch1.id, src)
251+ yield (v0_id, ch1.id, ch1)
252+
253
254 ########################
255 # The `FileStore` class:
256
257-class FileStore(object):
258+class FileStore:
259 """
260 Arranges files in a special layout according to their content-hash.
261
262@@ -868,10 +970,24 @@
263 self.__class__.__name__, self.parentdir)
264 )
265
266+ files = self.join('files')
267+ files0 = self.join('files0')
268+
269 # If basedir doesn't exist, create it and initialize all dirs in layout:
270 if ensuredir(self.basedir):
271 log.info('Initalizing FileStore in %r', self.basedir)
272 self.init_dirs()
273+ elif is_v0_files(files):
274+ if path.exists(files0):
275+ raise Exception(
276+ "'files' is V0 layout but 'files0' exists in {!r}".format(self.basedir)
277+ )
278+ log.warning("Moving V0 'files' to 'files0' in %r", self.basedir)
279+ os.rename(files, files0)
280+ self.init_dirs()
281+ migrate_store_doc(self.basedir)
282+
283+ self.needs_migration = path.isdir(files0)
284
285 def __repr__(self):
286 return '{}({!r})'.format(self.__class__.__name__, self.parentdir)
287@@ -894,11 +1010,11 @@
288 * the entry is a symlink (even if to a valid file)
289 * the file is zero bytes in size
290 """
291- for prefix in B32NAMES:
292+ for prefix in DB32NAMES:
293 subdir = path.join(self.basedir, 'files', prefix)
294 for name in sorted(os.listdir(subdir)):
295 _id = prefix + name
296- if len(_id) != DIGEST_B32LEN or not isb32(_id):
297+ if len(_id) != DIGEST_B32LEN or not isdb32(_id):
298 continue
299 fullname = path.join(subdir, name)
300 st = os.lstat(fullname)
301@@ -917,7 +1033,7 @@
302 d = path.join(self.basedir, name)
303 ensuredir(d)
304 os.chmod(d, 0o777)
305- for name in B32NAMES:
306+ for name in DB32NAMES:
307 d = path.join(self.basedir, 'files', name)
308 ensuredir(d)
309 os.chmod(d, 0o777)
310
311=== modified file 'filestore/misc.py'
312--- filestore/misc.py 2013-02-28 23:50:17 +0000
313+++ filestore/misc.py 2013-04-28 20:23:24 +0000
314@@ -65,6 +65,16 @@
315 }
316
317
318+def write_files(tmpdir, protocol=VERSION1):
319+ leaves = build_leaves(protocol.leaf_size)
320+ for (key, data) in leaves.items():
321+ open(path.join(tmpdir, key), 'xb').write(data)
322+ fp = open(path.join(tmpdir, 'C' + key), 'xb')
323+ fp.write(leaves['C'])
324+ fp.write(data)
325+ fp.close()
326+
327+
328 def build_vectors(protocol, encoder=db32enc):
329 leaves = build_leaves(protocol.leaf_size)
330
331
332=== modified file 'filestore/tests/__init__.py'
333--- filestore/tests/__init__.py 2013-02-21 15:43:24 +0000
334+++ filestore/tests/__init__.py 2013-04-28 20:23:24 +0000
335@@ -28,15 +28,15 @@
336 from os import path
337 import io
338 import stat
339-from base64 import b32encode, b32decode
340 from subprocess import check_call
341 import tempfile
342 import shutil
343+import json
344 from random import SystemRandom
345
346-from skein import skein512
347-from dbase32.rfc3548 import b32enc, b32dec
348-from dbase32 import isdb32
349+from _skein import skein512
350+from dbase32 import isdb32, db32enc, random_id
351+from dbase32.rfc3548 import b32enc
352
353 from filestore import protocols, misc
354 import filestore
355@@ -82,8 +82,8 @@
356 return str(n).encode('utf-8')
357
358
359-def random_id(id_bytes=filestore.DIGEST_BYTES):
360- return b32encode(os.urandom(id_bytes)).decode('ascii')
361+def random_file_id(numbytes=filestore.DIGEST_BYTES):
362+ return random_id(numbytes)
363
364
365 class TempDir(object):
366@@ -99,6 +99,11 @@
367 shutil.rmtree(self.dir)
368 self.dir = None
369
370+ def mkdir(self, *parts):
371+ d = self.join(*parts)
372+ os.mkdir(d)
373+ return d
374+
375 def makedirs(self, *parts):
376 d = self.join(*parts)
377 if not path.exists(d):
378@@ -410,23 +415,15 @@
379
380 # Test with good values:
381 content = b'N'
382- digest = f(2, content)
383 self.assertEqual(
384- digest,
385- skein512(protocols.VERSION0._hash_leaf_index(b'2') + content,
386- digest_bits=240,
387- pers=protocols.PERS_LEAF,
388- ).digest()
389+ filestore.hash_leaf(2, content),
390+ protocols.VERSION1.hash_leaf(2, content)
391 )
392
393 content = b'N' * filestore.LEAF_SIZE
394- digest = f(2, content)
395 self.assertEqual(
396- digest,
397- skein512(protocols.VERSION0._hash_leaf_index(b'2') + content,
398- digest_bits=240,
399- pers=protocols.PERS_LEAF,
400- ).digest()
401+ filestore.hash_leaf(2, content),
402+ protocols.VERSION1.hash_leaf(2, content)
403 )
404
405 # A 25k value sanity check on our crytographic claim that the
406@@ -524,11 +521,11 @@
407 leaf_hashes = b'D' * filestore.DIGEST_BYTES
408 self.assertEqual(
409 filestore.hash_root(1, leaf_hashes),
410- '4AZOU4R7M6JKJJRQMVX42YB7ULRUCS6FZGJNZCDVOATXYPML'
411+ '39QLJTDIFYBSMR8A9IHAIGWMDCOX3TLWVKIAY9KSHGDGHCEL'
412 )
413 self.assertEqual(
414 filestore.hash_root(filestore.LEAF_SIZE, leaf_hashes),
415- 'RXBEFCKXWKYNPXZOR234QHYI475L2AF7C4AOQUG7EG7UJBDJ'
416+ 'JL57GWEV6OC4DGGE5UV4YRJ3J3ARVU8GDPO6TNEJAK9ULNJW'
417 )
418
419 # A 25k value sanity check on our crytographic claim that the
420@@ -608,7 +605,7 @@
421 self.assertEqual(cm.exception.file_size, FILE_SIZE + 1)
422 self.assertEqual(cm.exception.leaf_hashes, LEAF_HASHES)
423 self.assertEqual(cm.exception.bad_id,
424- 'U5FK5XRT33ZJTCYIO3WJB7YTTGESXCBKZEW35J7FIQI7UN7S'
425+ 'K7BCQ9MWWKIEPQ9FNSWUATRW8UC845H7HDESWTRIK8NPMIRT'
426 )
427
428 with self.assertRaises(filestore.RootHashError) as cm:
429@@ -617,7 +614,7 @@
430 self.assertEqual(cm.exception.file_size, FILE_SIZE - 1)
431 self.assertEqual(cm.exception.leaf_hashes, LEAF_HASHES)
432 self.assertEqual(cm.exception.bad_id,
433- 'OYESBWEZ4Y2AGSLMNZB4ZF75A2VG7NXVB4R25SSMRGXLN4CR'
434+ 'EYYA6GQD64CR94H3OQEVX88S7R83CWBY8ECM4KRNCN4G7DC4'
435 )
436
437 def test_enumerate_leaf_hashes(self):
438@@ -738,7 +735,7 @@
439
440 def test_check_id(self):
441 # Test with wrong type
442- bad = random_id(30).encode('utf-8')
443+ bad = random_file_id(30).encode('utf-8')
444 with self.assertRaises(TypeError) as cm:
445 filestore.check_id(bad)
446 self.assertEqual(
447@@ -757,7 +754,7 @@
448 self.assertIs(cm.exception.id, value)
449
450 # Test with 48 and 56 character:
451- id48 = random_id(30)
452+ id48 = random_file_id(30)
453 self.assertIs(filestore.check_id(id48), id48)
454
455 # Test case sensitivity:
456@@ -1200,7 +1197,7 @@
457 ch = filestore.hash_fp(src_fp)
458 self.assertIsInstance(ch, filestore.ContentHash)
459 self.assertEqual(ch.id,
460- 'DMJGE4OTWZVKSX426GHE46GZCGICMQOVWNGRVB7E665Y2RAM'
461+ 'CLSFQE444IGFX49Y9M6R9PLTF97HG7OQXAIEYYP54HNBN559'
462 )
463 self.assertEqual(ch.file_size, 20971520)
464 self.assertFalse(path.exists(dst))
465@@ -1210,7 +1207,7 @@
466 ch = filestore.hash_fp(src_fp, dst_fp)
467 self.assertIsInstance(ch, filestore.ContentHash)
468 self.assertEqual(ch.id,
469- 'DMJGE4OTWZVKSX426GHE46GZCGICMQOVWNGRVB7E665Y2RAM'
470+ 'CLSFQE444IGFX49Y9M6R9PLTF97HG7OQXAIEYYP54HNBN559'
471 )
472 self.assertEqual(ch.file_size, 20971520)
473 self.assertTrue(path.isfile(dst))
474@@ -1224,7 +1221,7 @@
475 tmp.write(data, L)
476 tmp.write(C + data, 'C' + L)
477
478- vectors = misc.load_data('V0')
479+ vectors = misc.load_data('V1')
480 for name in ['A', 'B', 'C', 'CA', 'CB', 'CC']:
481 src_fp = open(tmp.join(name), 'rb')
482 ch = filestore.hash_fp(src_fp)
483@@ -1290,10 +1287,105 @@
484 filestore.StatVFS(size, size - free, avail, False, s1.f_frsize)
485 )
486
487+ def test_is_v0_files(self):
488+ v0 = TempDir()
489+ for name in filestore.B32NAMES:
490+ v0.mkdir(name)
491+ self.assertIs(filestore.is_v0_files(v0.dir), True)
492+ v1 = TempDir()
493+ for name in filestore.DB32NAMES:
494+ v1.mkdir(name)
495+ self.assertIs(filestore.is_v0_files(v1.dir), False)
496+ tmp = TempDir()
497+ fs = filestore.FileStore(tmp.dir)
498+ files = tmp.join('.dmedia', 'files')
499+ self.assertTrue(path.isdir(files))
500+ self.assertIs(filestore.is_v0_files(files), False)
501+
502+ def test_migrate_store_doc(self):
503+ tmp = TempDir()
504+ store = tmp.join('store.json')
505+ store0 = tmp.join('store0.json')
506+ self.assertIs(filestore.migrate_store_doc(tmp.dir), False)
507+
508+ # A V0 doc:
509+ doc = {
510+ '_id': 'DLA4NDZRW2LXEPF3RV7YHMON',
511+ 'copies': 1,
512+ 'plugin': 'filestore',
513+ 'time': 1320063400.353743,
514+ 'type': 'dmedia/store',
515+ }
516+ json.dump(doc, open(store, 'x'))
517+ self.assertIs(filestore.migrate_store_doc(tmp.dir), True)
518+ self.assertEqual(json.load(open(store, 'r')),
519+ {
520+ '_id': '6E3VG6SKPTEQ7I8UKOYRAFHG',
521+ 'copies': 1,
522+ 'plugin': 'filestore',
523+ 'time': 1320063400.353743,
524+ 'type': 'dmedia/store',
525+ 'migrated': True,
526+ }
527+ )
528+ self.assertEqual(stat.S_IMODE(os.stat(store).st_mode), 0o444)
529+ self.assertEqual(json.load(open(store0, 'r')), doc)
530+ self.assertEqual(
531+ set(os.listdir(tmp.dir)),
532+ {'store.json', 'store0.json'}
533+ )
534+ with self.assertRaises(Exception) as cm:
535+ filestore.migrate_store_doc(tmp.dir)
536+ self.assertEqual(
537+ str(cm.exception),
538+ "'store0.json' already exists in {!r}".format(tmp.dir)
539+ )
540+
541+ # Try with some random ID values:
542+ for i in range(25):
543+ tmp = TempDir()
544+ store = tmp.join('store.json')
545+ store0 = tmp.join('store0.json')
546+ data = os.urandom(15)
547+ b32_id = b32enc(data)
548+ db32_id = db32enc(data)
549+ self.assertNotEqual(b32_id, db32_id)
550+ json.dump({'_id': b32_id}, open(store, 'x'))
551+ self.assertIs(filestore.migrate_store_doc(tmp.dir), True)
552+ self.assertEqual(json.load(open(store, 'r')),
553+ {
554+ '_id': db32_id,
555+ 'migrated': True,
556+ }
557+ )
558+ self.assertEqual(stat.S_IMODE(os.stat(store).st_mode), 0o444)
559+ self.assertEqual(json.load(open(store0, 'r')), {'_id': b32_id})
560+ self.assertEqual(
561+ set(os.listdir(tmp.dir)),
562+ {'store.json', 'store0.json'}
563+ )
564+ with self.assertRaises(Exception) as cm:
565+ filestore.migrate_store_doc(tmp.dir)
566+ self.assertEqual(
567+ str(cm.exception),
568+ "'store0.json' already exists in {!r}".format(tmp.dir)
569+ )
570+
571
572 class TestHasher(TestCase):
573 def test_init(self):
574 h = filestore.Hasher()
575+ self.assertIs(h.protocol, protocols.VERSION1)
576+ self.assertIs(h.enc, db32enc)
577+ self.assertEqual(h.file_size, 0)
578+ self.assertEqual(h.leaf_index, 0)
579+ self.assertIsInstance(h.array, bytearray)
580+ self.assertEqual(h.array, b'')
581+ self.assertFalse(h.closed)
582+
583+ h = filestore.Hasher(protocols.VERSION0, b32enc)
584+ self.assertIs(h.protocol, protocols.VERSION0)
585+ self.assertIs(h.enc, b32enc)
586 self.assertEqual(h.file_size, 0)
587 self.assertEqual(h.leaf_index, 0)
588 self.assertIsInstance(h.array, bytearray)
589@@ -1334,7 +1426,7 @@
590 str(cm.exception),
591 'Expected leaf.index 1, got 0'
592 )
593-
594+
595 # Test when it's all good
596 h = filestore.Hasher()
597
598@@ -1370,6 +1462,28 @@
599 h.content_hash()
600 self.assertTrue(h.closed)
601
602+ def test_V1(self):
603+ tmp = TempDir()
604+ root_hashes = misc.load_data('V1')['root_hashes']
605+ misc.write_files(tmp.dir, protocols.VERSION1)
606+ for (name, _id) in root_hashes.items():
607+ fp = open(tmp.join(name), 'rb')
608+ h = filestore.Hasher()
609+ for leaf in filestore.reader_iter(fp):
610+ h.hash_leaf(leaf)
611+ self.assertEqual(h.content_hash().id, _id)
612+
613+ def test_V0(self):
614+ tmp = TempDir()
615+ root_hashes = misc.load_data('V0')['root_hashes']
616+ misc.write_files(tmp.dir, protocols.VERSION0)
617+ for (name, _id) in root_hashes.items():
618+ fp = open(tmp.join(name), 'rb')
619+ h = filestore.Hasher(protocols.VERSION0, b32enc)
620+ for leaf in filestore.reader_iter(fp):
621+ h.hash_leaf(leaf)
622+ self.assertEqual(h.content_hash().id, _id)
623+
624
625 class TestFileStore(TestCase):
626 def test_init(self):
627@@ -1438,12 +1552,14 @@
628 self.assertTrue(path.isdir(fs.tmp))
629 self.assertIsNone(fs.id)
630 self.assertEqual(fs.copies, 0)
631+ self.assertFalse(fs.needs_migration)
632
633 # Test when _id and copies are supplied
634 tmp = TempDir()
635 fs = filestore.FileStore(tmp.dir, 'foo', 1)
636 self.assertEqual(fs.id, 'foo')
637 self.assertEqual(fs.copies, 1)
638+ self.assertFalse(fs.needs_migration)
639
640 # Test when basedir exists and is a directory
641 tmp = TempDir()
642@@ -1454,6 +1570,7 @@
643 self.assertTrue(path.isdir(basedir))
644 self.assertEqual(fs.tmp, path.join(basedir, 'tmp'))
645 self.assertFalse(path.isdir(fs.tmp))
646+ self.assertFalse(fs.needs_migration)
647
648 # Test when basedir exists and is a file
649 tmp = TempDir()
650@@ -1477,6 +1594,89 @@
651 '{!r} is symlink to {!r}'.format(basedir, d)
652 )
653
654+ # Test when .dmedia/files/ contains a V0, Base32 layout:
655+ tmp = TempDir()
656+ files = tmp.join('.dmedia', 'files')
657+ files0 = tmp.join('.dmedia', 'files0')
658+ fs = filestore.FileStore(tmp.dir)
659+ shutil.rmtree(files)
660+ os.mkdir(files)
661+ for name in filestore.B32NAMES:
662+ os.mkdir(path.join(files, name))
663+ fs = filestore.FileStore(tmp.dir)
664+ self.assertTrue(fs.needs_migration)
665+ self.assertTrue(path.isdir(files))
666+ for name in filestore.DB32NAMES:
667+ self.assertTrue(path.isdir(path.join(files, name)))
668+ self.assertEqual(sorted(os.listdir(files)), list(filestore.DB32NAMES))
669+ self.assertTrue(path.isdir(files0))
670+ for name in filestore.B32NAMES:
671+ self.assertTrue(path.isdir(path.join(files0, name)))
672+ self.assertEqual(sorted(os.listdir(files0)), list(filestore.B32NAMES))
673+
674+ # Test that no futher change is done:
675+ fs = filestore.FileStore(tmp.dir)
676+ self.assertTrue(fs.needs_migration)
677+ self.assertTrue(path.isdir(files))
678+ for name in filestore.DB32NAMES:
679+ self.assertTrue(path.isdir(path.join(files, name)))
680+ self.assertEqual(sorted(os.listdir(files)), list(filestore.DB32NAMES))
681+ self.assertTrue(path.isdir(files0))
682+ for name in filestore.B32NAMES:
683+ self.assertTrue(path.isdir(path.join(files0, name)))
684+ self.assertEqual(sorted(os.listdir(files0)), list(filestore.B32NAMES))
685+
686+ # Test when files contains V0/Base32 layout but files0 already exists:
687+ shutil.rmtree(files)
688+ os.mkdir(files)
689+ for name in filestore.B32NAMES:
690+ os.mkdir(path.join(files, name))
691+ with self.assertRaises(Exception) as cm:
692+ fs = filestore.FileStore(tmp.dir)
693+ self.assertEqual(
694+ str(cm.exception),
695+ "'files' is V0 layout but 'files0' exists in {!r}".format(tmp.join('.dmedia'))
696+ )
697+
698+ # Test that store.json gets properly migrated:
699+ tmp = TempDir()
700+ files = tmp.join('.dmedia', 'files')
701+ files0 = tmp.join('.dmedia', 'files0')
702+ store = tmp.join('.dmedia', 'store.json')
703+ store0 = tmp.join('.dmedia', 'store0.json')
704+
705+ # Setup:
706+ fs = filestore.FileStore(tmp.dir)
707+ shutil.rmtree(files)
708+ os.mkdir(files)
709+ for name in filestore.B32NAMES:
710+ os.mkdir(path.join(files, name))
711+ data = os.urandom(15)
712+ b32_id = b32enc(data)
713+ db32_id = db32enc(data)
714+ self.assertNotEqual(b32_id, db32_id)
715+ json.dump({'_id': b32_id}, open(store, 'x'))
716+
717+ # And test:
718+ fs = filestore.FileStore(tmp.dir)
719+ self.assertTrue(fs.needs_migration)
720+ self.assertTrue(path.isdir(files))
721+ for name in filestore.DB32NAMES:
722+ self.assertTrue(path.isdir(path.join(files, name)))
723+ self.assertEqual(sorted(os.listdir(files)), list(filestore.DB32NAMES))
724+ self.assertTrue(path.isdir(files0))
725+ for name in filestore.B32NAMES:
726+ self.assertTrue(path.isdir(path.join(files0, name)))
727+ self.assertEqual(sorted(os.listdir(files0)), list(filestore.B32NAMES))
728+ self.assertEqual(json.load(open(store, 'r')),
729+ {
730+ '_id': db32_id,
731+ 'migrated': True,
732+ }
733+ )
734+ self.assertEqual(stat.S_IMODE(os.stat(store).st_mode), 0o444)
735+ self.assertEqual(json.load(open(store0, 'r')), {'_id': b32_id})
736+
737 def test_repr(self):
738 tmp = TempDir()
739 fs = filestore.FileStore(tmp.dir)
740@@ -1487,7 +1687,7 @@
741 fs = filestore.FileStore(tmp.dir)
742
743 # Should ignore files with wrong ID length:
744- short = tuple(random_id(25) for i in range(50))
745+ short = tuple(random_file_id(25) for i in range(50))
746 for _id in short:
747 f = fs.join('files', _id[:2], _id[2:])
748 assert not path.exists(f)
749@@ -1495,7 +1695,7 @@
750 os.chmod(f, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
751 self.assertEqual(path.getsize(f), 7)
752 self.assertEqual(list(fs), [])
753- long = tuple(random_id(40) for i in range(50))
754+ long = tuple(random_file_id(40) for i in range(50))
755 for _id in long:
756 f = fs.join('files', _id[:2], _id[2:])
757 assert not path.exists(f)
758@@ -1506,7 +1706,7 @@
759
760 # Should ignore files invalid b32 letters:
761 for i in range(50):
762- _id = random_id(25) + '1ABCDEFG' # 1 is not in B32ALPHABET
763+ _id = random_file_id(25) + '1ABCDEFG' # 1 is not in B32ALPHABET
764 assert len(_id) == filestore.DIGEST_B32LEN
765 f = fs.join('files', _id[:2], _id[2:])
766 assert not path.exists(f)
767@@ -1517,7 +1717,7 @@
768
769 # Should ignore empty files:
770 for i in range(50):
771- _id = random_id()
772+ _id = random_file_id()
773 f = fs.join('files', _id[:2], _id[2:])
774 assert not path.exists(f)
775 open(f, 'wb').close()
776@@ -1528,7 +1728,7 @@
777
778 # Should ignore directories
779 for i in range(50):
780- _id = random_id()
781+ _id = random_file_id()
782 d = fs.join('files', _id[:2], _id[2:])
783 assert not path.exists(d)
784 os.mkdir(d)
785@@ -1539,7 +1739,7 @@
786 # Now add valid files in (48 character IDs)
787 stats = []
788 for i in range(2000):
789- _id = random_id(30)
790+ _id = random_file_id(30)
791 size = i + 1
792 f = fs.path(_id)
793 assert not path.exists(f)
794@@ -1556,7 +1756,7 @@
795 # Should ignore symlinks, even if to valid files
796 # This makes sure os.lstat() is being used rather than os.stat()
797 for i in range(50):
798- _id = random_id()
799+ _id = random_file_id()
800 link = fs.path(_id)
801 assert not path.exists(link)
802 file = fs.path(stats[i].id)
803@@ -1578,7 +1778,7 @@
804
805 self.assertEqual(
806 sorted(os.listdir(path.join(basedir, 'files'))),
807- list(B32NAMES)
808+ list(filestore.DB32NAMES)
809 )
810 for name in ['corrupt', 'partial', 'tmp']:
811 d = path.join(basedir, name)
812@@ -1588,7 +1788,7 @@
813 d = path.join(basedir, name)
814 self.assertTrue(path.isdir(d))
815 self.assertFalse(path.islink(d))
816- for name in B32NAMES:
817+ for name in filestore.DB32NAMES:
818 d = path.join(basedir, 'files', name)
819 self.assertTrue(path.isdir(d))
820 self.assertFalse(path.islink(d))
821@@ -1612,7 +1812,7 @@
822
823 # Test when some subdirs exist:
824 os.rmdir(path.join(basedir, 'tmp'))
825- for (i, name) in enumerate(B32NAMES):
826+ for (i, name) in enumerate(filestore.DB32NAMES):
827 if i % 3 == 0:
828 d = path.join(basedir, 'files', name)
829 self.assertIsNone(fs.init_dirs())
830@@ -1702,7 +1902,7 @@
831 parentdir = tmp.makedirs('foo')
832 fs = filestore.FileStore(parentdir)
833
834- _id = random_id()
835+ _id = random_file_id()
836 self.assertEqual(
837 fs.path(_id),
838 tmp.join('foo', '.dmedia', 'files', _id[:2], _id[2:])
839@@ -1719,7 +1919,7 @@
840 parentdir = tmp.makedirs('foo')
841 fs = filestore.FileStore(parentdir)
842
843- _id = random_id()
844+ _id = random_file_id()
845 self.assertEqual(
846 fs.partial_path(_id),
847 tmp.join('foo', '.dmedia', 'partial', _id)
848@@ -1736,7 +1936,7 @@
849 parentdir = tmp.makedirs('foo')
850 fs = filestore.FileStore(parentdir)
851
852- _id = random_id()
853+ _id = random_file_id()
854 self.assertEqual(
855 fs.corrupt_path(_id),
856 tmp.join('foo', '.dmedia', 'corrupt', _id)
857@@ -1762,7 +1962,7 @@
858 tmp = TempDir()
859 fs = filestore.FileStore(tmp.dir)
860
861- id1 = random_id()
862+ id1 = random_file_id()
863
864 # File doesn't exist
865 self.assertFalse(fs.exists(id1))
866@@ -1772,13 +1972,13 @@
867 self.assertTrue(fs.exists(id1))
868
869 # Not file:
870- id2 = random_id()
871+ id2 = random_file_id()
872 tmp.makedirs('.dmedia', 'files', id2[:2], id2[2:])
873 self.assertTrue(path.isdir(fs.path(id2)))
874 self.assertFalse(fs.exists(id2))
875
876 # Empty file
877- id3 = random_id()
878+ id3 = random_file_id()
879 f = fs.path(id3)
880 assert not path.exists(f)
881 open(f, 'wb').close()
882@@ -1789,7 +1989,7 @@
883 # File doesn't exist
884 tmp = TempDir()
885 fs = filestore.FileStore(tmp.dir)
886- _id = random_id()
887+ _id = random_file_id()
888 with self.assertRaises(filestore.FileNotFound) as cm:
889 st = fs.stat(_id)
890 self.assertEqual(cm.exception.id, _id)
891@@ -1802,9 +2002,9 @@
892 # File is a symlink:
893 tmp = TempDir()
894 fs = filestore.FileStore(tmp.dir)
895- file = random_id()
896- link = random_id()
897- open(fs.path(file), 'wb').write(b'Novacut')
898+ file = random_file_id()
899+ link = random_file_id()
900+ open(fs.path(file), 'xb').write(b'Novacut')
901 os.symlink(fs.path(file), fs.path(link))
902 assert path.isfile(fs.path(link))
903 assert path.islink(fs.path(link))
904@@ -1820,7 +2020,7 @@
905 # File is a directory
906 tmp = TempDir()
907 fs = filestore.FileStore(tmp.dir)
908- _id = random_id()
909+ _id = random_file_id()
910 os.mkdir(fs.path(_id))
911 assert path.isdir(fs.path(_id))
912 with self.assertRaises(filestore.FileNotFound) as cm:
913@@ -1835,7 +2035,7 @@
914 # Empty file
915 tmp = TempDir()
916 fs = filestore.FileStore(tmp.dir)
917- _id = random_id()
918+ _id = random_file_id()
919 open(fs.path(_id), 'wb').close()
920 assert path.isfile(fs.path(_id))
921 assert not path.islink(fs.path(_id))
922@@ -1852,7 +2052,7 @@
923 # Valid file
924 tmp = TempDir()
925 fs = filestore.FileStore(tmp.dir)
926- _id = random_id()
927+ _id = random_file_id()
928 open(fs.path(_id), 'wb').write(b'Novacut')
929 st = fs.stat(_id)
930 self.assertIsInstance(st, filestore.Stat)
931@@ -1865,7 +2065,7 @@
932 tmp = TempDir()
933 fs = filestore.FileStore(tmp.dir)
934
935- _id = random_id()
936+ _id = random_file_id()
937
938 # File doesn't exist
939 with self.assertRaises(filestore.FileNotFound) as cm:
940@@ -2123,7 +2323,7 @@
941 def test_remove(self):
942 tmp = TempDir()
943 fs = filestore.FileStore(tmp.dir)
944- _id = random_id()
945+ _id = random_file_id()
946 canonical = fs.path(_id)
947
948 # File doesn't exist
949@@ -2259,7 +2459,7 @@
950 def test_allocate_partial(self):
951 tmp = TempDir()
952 fs = filestore.FileStore(tmp.dir)
953- _id = random_id()
954+ _id = random_file_id()
955 filename = tmp.join('.dmedia', 'partial', _id)
956
957 # Test when file dosen't yet exist
958@@ -2294,7 +2494,7 @@
959 self.assertEqual(os.fstat(fp.fileno()).st_size, 2311)
960 self.assertEqual(stat.S_IMODE(os.fstat(fp.fileno()).st_mode), 0o666)
961
962- _id = random_id() # We'll use a new ID for below
963+ _id = random_file_id() # We'll use a new ID for below
964 filename = tmp.join('.dmedia', 'partial', _id)
965
966 # Test with bad size type:
967@@ -2331,7 +2531,7 @@
968 def test_move_to_canonical(self):
969 tmp = TempDir()
970 fs = filestore.FileStore(tmp.dir)
971- _id = random_id()
972+ _id = random_file_id()
973 dst = fs.path(_id)
974
975 # Test with wrong src_fp type
976@@ -2402,7 +2602,7 @@
977 def test_move_to_corrupt(self):
978 tmp = TempDir()
979 fs = filestore.FileStore(tmp.dir)
980- _id = random_id()
981+ _id = random_file_id()
982 corrupt = fs.corrupt_path(_id)
983 canonical = fs.path(_id)
984
985
986=== modified file 'filestore/tests/test_misc.py'
987--- filestore/tests/test_misc.py 2013-03-01 00:02:29 +0000
988+++ filestore/tests/test_misc.py 2013-04-28 20:23:24 +0000
989@@ -33,6 +33,8 @@
990 from dbase32 import db32enc, db32dec
991 from dbase32.rfc3548 import b32enc, b32dec
992
993+from . import TempDir
994+
995 import filestore
996 from filestore.protocols import MIN_LEAF_SIZE
997 from filestore import misc, protocols
998@@ -79,6 +81,24 @@
999 self.assertEqual(len(obj['B']), 2 * MIN_LEAF_SIZE - 1)
1000 self.assertEqual(len(obj['C']), 2 * MIN_LEAF_SIZE)
1001
1002+ def test_build_leaves(self):
1003+ tmp = TempDir()
1004+ leaves = misc.build_leaves(protocols.VERSION1.leaf_size)
1005+ self.assertIsNone(misc.write_files(tmp.dir))
1006+ self.assertEqual(
1007+ sorted(os.listdir(tmp.dir)),
1008+ ['A', 'B', 'C', 'CA', 'CB', 'CC']
1009+ )
1010+ for (key, data) in leaves.items():
1011+ self.assertEqual(
1012+ open(tmp.join(key), 'rb').read(),
1013+ data
1014+ )
1015+ self.assertEqual(
1016+ open(tmp.join('C' + key), 'rb').read(),
1017+ leaves['C'] + data
1018+ )
1019+
1020 def test_tohex(self):
1021 for i in range(10):
1022 h = md5(os.urandom(16))

Subscribers

People subscribed via source and target branches