Merge lp:~jderose/filestore/protocols into lp:filestore
- protocols
- Merge into trunk
Status: | Merged |
---|---|
Approved by: | James Raymond |
Approved revision: | 322 |
Merged at revision: | 281 |
Proposed branch: | lp:~jderose/filestore/protocols |
Merge into: | lp:filestore |
Diff against target: |
3008 lines (+2212/-491) 10 files modified
MANIFEST.in (+1/-1) benchmark-protocol.py (+28/-11) filestore/__init__.py (+48/-184) filestore/data/test-vectors.json (+50/-0) filestore/misc.py (+124/-0) filestore/protocols.py (+386/-0) filestore/tests/__init__.py (+192/-293) filestore/tests/test_misc.py (+150/-0) filestore/tests/test_protocols.py (+1223/-0) setup.py (+10/-2) |
To merge this branch: | bzr merge lp:~jderose/filestore/protocols |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
James Raymond | Approve | ||
Review via email: mp+141504@code.launchpad.net |
Commit message
Description of the change
This puts the infrastructure in place for selecting between multiple protocols based on ID length, but so far it causes no external API change and the version zero protocol is used by default. Also, not all code paths have been updated to be protocol aware... I did just enough to prove the overall approach is viable. As this is already a rather large merge, I think it's better to finish the API updates is another merge (and probably further refine some things).
Details:
* Moves filestore.py to filestore/
* Adds new filestore/
* Changes the proposed v1 protocol from 240 to 280 bits so we can do a nice migration, fully test our length-based protocol identification
* Adds new filestore/misc.py module with some utility functions for generating standard test vectors for a protocol version... before we officially commit to the v1 protocol, it would be nice to have at least one independent "clean-room" implementation based on just a description of the protocol, verified with our test vectors, and in a language other than Python (my vote is Java because of Android)
* Adds v0 and v1 test vectors in test-vectors.json data file
* Moves the TempFileStore class into the misc.py module, which normally isn't imported, which allows us to reduce Dmedia's memory usage a bit (because filestore doesn't need to import `shutil` and `tempfile`)
* Removes the hash_leaf2(), hash_root2() functions as their tests, as they are now replaced by an implementation of the same using the new Protocol class
* Updates Hasher, hash_fp(), FileStore.verify(), check_root_hash(), and a few other misc functions to be protocol version aware... this isn't a complete porting yet, just enough to confirm that the Protocol API is viable
* A bit of unit test refactoring and cleanup
Jason Gerard DeRose (jderose) wrote : | # |
James Raymond (jamesmr) : | # |
James Raymond (jamesmr) wrote : | # |
Long read, but looks okay to me!
Preview Diff
1 | === modified file 'MANIFEST.in' |
2 | --- MANIFEST.in 2011-08-19 20:51:05 +0000 |
3 | +++ MANIFEST.in 2012-12-31 04:35:24 +0000 |
4 | @@ -1,2 +1,2 @@ |
5 | -include COPYING test_filestore.py |
6 | +include COPYING |
7 | include doc/* |
8 | |
9 | === modified file 'benchmark-protocol.py' |
10 | --- benchmark-protocol.py 2012-12-02 00:16:59 +0000 |
11 | +++ benchmark-protocol.py 2012-12-31 04:35:24 +0000 |
12 | @@ -3,30 +3,47 @@ |
13 | import timeit |
14 | |
15 | setup = """ |
16 | -import os |
17 | from base64 import b32encode, b64encode |
18 | |
19 | -from filestore import DIGEST_BYTES, hash_root, hash_root2 |
20 | - |
21 | -digest = os.urandom(DIGEST_BYTES) |
22 | +from filestore import check_id, B32ALPHABET |
23 | +from filestore.protocols import Protocol, OldProtocol, MiB |
24 | + |
25 | +digest = b'N' * 30 |
26 | +_id = 'GTUK6VPCITPAUSG3FLVNYK7IXEPXWDKGQ4T2X4IUOYXDE232' |
27 | + |
28 | +new = Protocol(8 * MiB, 240) |
29 | +old = OldProtocol(8 * MiB, 240) |
30 | + |
31 | +leaf_index = 17 |
32 | +leaf_data = b'S' * 1776 |
33 | + |
34 | file_size = 31415 |
35 | -leaf_hashes = b'L' * DIGEST_BYTES |
36 | +leaf_hashes = b'D' * 30 |
37 | """ |
38 | |
39 | -N = 50 * 1000 |
40 | +N = 100 * 1000 |
41 | |
42 | def run(statement): |
43 | t = timeit.Timer(statement, setup) |
44 | elapsed = t.timeit(N) |
45 | - print('{:.0f}/s: {}'.format(N / elapsed, statement)) |
46 | + rate = int(N / elapsed) |
47 | + print('{:>10,}: {}'.format(rate, statement)) |
48 | + |
49 | + |
50 | +print('Executions per second:') |
51 | |
52 | run('b32encode(digest)') |
53 | -run("b32encode(digest).decode('utf-8')") |
54 | -run("b32encode(digest).decode('ascii')") |
55 | |
56 | run('b64encode(digest)') |
57 | run("b64encode(digest).decode('utf-8')") |
58 | run("b64encode(digest).decode('ascii')") |
59 | |
60 | -run('hash_root(file_size, leaf_hashes)') |
61 | -run('hash_root2(file_size, leaf_hashes)') |
62 | +run('check_id(_id)') |
63 | +run('set(_id).issubset(B32ALPHABET)') |
64 | +run('B32ALPHABET.issuperset(_id)') |
65 | + |
66 | +run("old._hash_leaf(leaf_index, leaf_data, b'')") |
67 | +run("new._hash_leaf(leaf_index, leaf_data, b'')") |
68 | + |
69 | +run('old._hash_root(file_size, leaf_hashes)') |
70 | +run('new._hash_root(file_size, leaf_hashes)') |
71 | |
72 | === added directory 'filestore' |
73 | === renamed file 'filestore.py' => 'filestore/__init__.py' |
74 | --- filestore.py 2012-12-19 08:54:54 +0000 |
75 | +++ filestore/__init__.py 2012-12-31 04:35:24 +0000 |
76 | @@ -28,7 +28,7 @@ |
77 | |
78 | The best way to experiment is to use a `TempFileStore`. For example: |
79 | |
80 | ->>> from filestore import TempFileStore |
81 | +>>> from filestore.misc import TempFileStore |
82 | >>> fs = TempFileStore() |
83 | >>> tmp_fp = fs.allocate_tmp() |
84 | >>> tmp_fp.write(b'Hello, Bruce Schneier!') |
85 | @@ -75,8 +75,6 @@ |
86 | import os |
87 | from os import path |
88 | import io |
89 | -import tempfile |
90 | -import shutil |
91 | import stat |
92 | from base64 import b32encode, b64encode |
93 | import hashlib |
94 | @@ -87,6 +85,8 @@ |
95 | |
96 | from skein import skein512 |
97 | |
98 | +from .protocols import VERSION0, VERSION1 |
99 | + |
100 | try: |
101 | from _filestore import fallocate, posix_fadvise, fastread |
102 | except ImportError: |
103 | @@ -114,6 +114,9 @@ |
104 | DIGEST_BYTES = DIGEST_BITS // 8 |
105 | DIGEST_B32LEN = DIGEST_BITS // 5 |
106 | |
107 | +ALLOWED_B32LEN = (48, 56) |
108 | +PROTOCOL = VERSION0 |
109 | + |
110 | # Handy constants for file layout: |
111 | DOTNAME = '.dmedia' |
112 | B32ALPHABET = frozenset('234567ABCDEFGHIJKLMNOPQRSTUVWXYZ') |
113 | @@ -259,124 +262,15 @@ |
114 | :param leaf_hashes: a ``bytes`` instance that is the concatenated leaf |
115 | hashes produced by `hash_leaf()` |
116 | """ |
117 | - if not isinstance(file_size, int): |
118 | - raise TypeError( |
119 | - TYPE_ERROR.format('file_size', int, type(file_size), file_size) |
120 | - ) |
121 | - if file_size < 1: |
122 | - raise ValueError( |
123 | - 'file_size: must be >= 1; got {!r}'.format(file_size) |
124 | - ) |
125 | - if not isinstance(leaf_hashes, bytes): |
126 | - raise TypeError(TYPE_ERROR.format( |
127 | - 'leaf_hashes', bytes, type(leaf_hashes), leaf_hashes |
128 | - ) |
129 | - ) |
130 | - if len(leaf_hashes) == 0: |
131 | - raise ValueError('leaf_hashes cannot be empty') |
132 | - if len(leaf_hashes) % DIGEST_BYTES != 0: |
133 | - raise ValueError('len(leaf_hashes) is {}, not multiple of {}'.format( |
134 | - len(leaf_hashes), DIGEST_BYTES) |
135 | - ) |
136 | - n = len(leaf_hashes) // DIGEST_BYTES |
137 | - low = (n - 1) * LEAF_SIZE + 1 |
138 | - high = n * LEAF_SIZE |
139 | - if not (low <= file_size <= high): |
140 | - raise ValueError( |
141 | - 'Need {} <= file_size <= {}; got {}'.format(low, high, file_size) |
142 | - ) |
143 | - skein = skein512(hash_file_size(file_size), |
144 | - digest_bits=DIGEST_BITS, |
145 | - pers=PERS_ROOT, |
146 | - ) |
147 | - skein.update(leaf_hashes) |
148 | - return b32encode(skein.digest()).decode('utf-8') |
149 | - |
150 | - |
151 | -def hash_leaf2(leaf_index, leaf_data, challenge=b''): |
152 | - """ |
153 | - Return the leaf-hash of the leaf at (zero) index *leaf_index*. |
154 | - |
155 | - For example: |
156 | - |
157 | - >>> b32encode(hash_leaf2(3, b'XYZ')) |
158 | - b'4GIMNJIAKM5NK3I26QWZFENZRM66CZB2VQK4YSXGH3KWBZCQ' |
159 | - |
160 | - :param leaf_index: an ``int`` >= 0 |
161 | - :param leaf_data: a ``bytes`` instance with contents of this leaf |
162 | - :param challenge: a ``bytes`` instance containing a random nonce to be used |
163 | - in a proof-of-storage |
164 | - """ |
165 | - if not isinstance(leaf_index, int): |
166 | - raise TypeError( |
167 | - TYPE_ERROR.format('leaf_index', int, type(leaf_index), leaf_index) |
168 | - ) |
169 | - if leaf_index < 0: |
170 | - raise ValueError( |
171 | - 'leaf_index: must be >= 0; got {!r}'.format(leaf_index) |
172 | - ) |
173 | - if not isinstance(leaf_data, bytes): |
174 | - raise TypeError( |
175 | - TYPE_ERROR.format('leaf_data', bytes, type(leaf_data), leaf_data) |
176 | - ) |
177 | - if not (1 <= len(leaf_data) <= LEAF_SIZE): |
178 | - raise ValueError('Need 1 <= len(leaf_data) <= {}; got {}'.format( |
179 | - LEAF_SIZE, len(leaf_data) |
180 | - ) |
181 | - ) |
182 | - return skein512(leaf_data, |
183 | - digest_bits=DIGEST_BITS, |
184 | - pers=PERS_LEAF, |
185 | - key=str(leaf_index).encode('utf-8'), |
186 | - nonce=challenge, |
187 | - ).digest() |
188 | - |
189 | - |
190 | -def hash_root2(file_size, leaf_hashes): |
191 | - """ |
192 | - Return the root-hash for a file *file_size* bytes with *leaf_hashes*. |
193 | - |
194 | - For example: |
195 | - |
196 | - >>> hash_root2(31415, b'NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN') |
197 | - '7M6RPEWAIMYTKIISSB3K4ORAX2ONZ6R3B4XAZWQURMGD2IW4' |
198 | - |
199 | - :param file_size: an ``int`` >= 1 |
200 | - :param leaf_hashes: a ``bytes`` instance that is the concatenated leaf |
201 | - hashes produced by `hash_leaf2()` |
202 | - """ |
203 | - if not isinstance(file_size, int): |
204 | - raise TypeError( |
205 | - TYPE_ERROR.format('file_size', int, type(file_size), file_size) |
206 | - ) |
207 | - if file_size < 1: |
208 | - raise ValueError( |
209 | - 'file_size: must be >= 1; got {!r}'.format(file_size) |
210 | - ) |
211 | - if not isinstance(leaf_hashes, bytes): |
212 | - raise TypeError(TYPE_ERROR.format( |
213 | - 'leaf_hashes', bytes, type(leaf_hashes), leaf_hashes |
214 | - ) |
215 | - ) |
216 | - if len(leaf_hashes) == 0: |
217 | - raise ValueError('leaf_hashes cannot be empty') |
218 | - if len(leaf_hashes) % DIGEST_BYTES != 0: |
219 | - raise ValueError('len(leaf_hashes) is {}, not multiple of {}'.format( |
220 | - len(leaf_hashes), DIGEST_BYTES) |
221 | - ) |
222 | - n = len(leaf_hashes) // DIGEST_BYTES |
223 | - low = (n - 1) * LEAF_SIZE + 1 |
224 | - high = n * LEAF_SIZE |
225 | - if not (low <= file_size <= high): |
226 | - raise ValueError( |
227 | - 'Need {} <= file_size <= {}; got {}'.format(low, high, file_size) |
228 | - ) |
229 | - skein = skein512(leaf_hashes, |
230 | - digest_bits=DIGEST_BITS, |
231 | - pers=PERS_ROOT, |
232 | - key=str(file_size).encode('utf-8'), |
233 | - ) |
234 | - return b32encode(skein.digest()).decode('utf-8') |
235 | + return VERSION0.hash_root(file_size, leaf_hashes) |
236 | + |
237 | + |
238 | +def get_protocol(_id): |
239 | + check_id(_id) |
240 | + if len(_id) == 48: |
241 | + return VERSION0 |
242 | + assert len(_id) == 56 |
243 | + return VERSION1 |
244 | |
245 | |
246 | class Hasher(object): |
247 | @@ -384,9 +278,10 @@ |
248 | A helper to keep track of state as you hash leaf after leaf. |
249 | """ |
250 | |
251 | - __slots__ = ('file_size', 'leaf_index', 'array', 'closed') |
252 | + __slots__ = ('protocol', 'file_size', 'leaf_index', 'array', 'closed') |
253 | |
254 | - def __init__(self): |
255 | + def __init__(self, protocol=PROTOCOL): |
256 | + self.protocol = protocol |
257 | self.file_size = 0 |
258 | self.leaf_index = 0 |
259 | self.array = bytearray() |
260 | @@ -401,9 +296,9 @@ |
261 | raise Exception('Expected leaf.index {}, got {}'.format( |
262 | self.leaf_index, leaf.index) |
263 | ) |
264 | - if len(leaf.data) < LEAF_SIZE: |
265 | + if len(leaf.data) < self.protocol.leaf_size: |
266 | self.closed = True |
267 | - leaf_hash = hash_leaf(leaf.index, leaf.data) |
268 | + leaf_hash = self.protocol.hash_leaf(leaf.index, leaf.data) |
269 | self.array.extend(leaf_hash) |
270 | self.file_size += len(leaf.data) |
271 | self.leaf_index += 1 |
272 | @@ -413,7 +308,7 @@ |
273 | self.closed = True |
274 | leaf_hashes = bytes(self.array) |
275 | return ContentHash( |
276 | - hash_root(self.file_size, leaf_hashes), |
277 | + self.protocol.hash_root(self.file_size, leaf_hashes), |
278 | self.file_size, |
279 | leaf_hashes |
280 | ) |
281 | @@ -535,12 +430,12 @@ |
282 | """ |
283 | if not isinstance(_id, str): |
284 | raise TypeError(TYPE_ERROR.format('_id', str, type(_id), _id)) |
285 | - if not (len(_id) == DIGEST_B32LEN and set(_id).issubset(B32ALPHABET)): |
286 | + if not (len(_id) in ALLOWED_B32LEN and set(_id).issubset(B32ALPHABET)): |
287 | raise IDError(_id) |
288 | return _id |
289 | |
290 | |
291 | -def check_leaf_hashes(leaf_hashes): |
292 | +def check_leaf_hashes(leaf_hashes, protocol=PROTOCOL): |
293 | """ |
294 | Verify that *leaf_hashes* is a ``bytes`` instance of correct length. |
295 | |
296 | @@ -562,7 +457,7 @@ |
297 | 'leaf_hashes', bytes, type(leaf_hashes), leaf_hashes |
298 | ) |
299 | ) |
300 | - if len(leaf_hashes) == 0 or len(leaf_hashes) % DIGEST_BYTES != 0: |
301 | + if len(leaf_hashes) == 0 or len(leaf_hashes) % protocol.digest_bytes != 0: |
302 | raise LeafHashesError(leaf_hashes) |
303 | return leaf_hashes |
304 | |
305 | @@ -590,11 +485,12 @@ |
306 | 22 |
307 | |
308 | """ |
309 | - computed = hash_root(file_size, leaf_hashes) |
310 | + protocol = get_protocol(_id) |
311 | + computed = protocol.hash_root(file_size, leaf_hashes) |
312 | if _id != computed: |
313 | raise RootHashError(_id, file_size, leaf_hashes, computed) |
314 | if unpack: |
315 | - leaf_hashes = tuple(iter_leaf_hashes(leaf_hashes)) |
316 | + leaf_hashes = tuple(iter_leaf_hashes(leaf_hashes, protocol)) |
317 | return ContentHash(_id, file_size, leaf_hashes) |
318 | |
319 | |
320 | @@ -628,7 +524,7 @@ |
321 | ################################################# |
322 | # Utility functions for working with leaf_hashes: |
323 | |
324 | -def enumerate_leaf_hashes(leaf_hashes): |
325 | +def enumerate_leaf_hashes(leaf_hashes, protocol=PROTOCOL): |
326 | """ |
327 | Enumerate *leaf_hashes*. |
328 | |
329 | @@ -640,12 +536,13 @@ |
330 | [(0, b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'), (1, b'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB')] |
331 | |
332 | """ |
333 | - check_leaf_hashes(leaf_hashes) |
334 | - for i in range(len(leaf_hashes) // DIGEST_BYTES): |
335 | - yield (i, leaf_hashes[i*DIGEST_BYTES : (i+1)*DIGEST_BYTES]) |
336 | - |
337 | - |
338 | -def iter_leaf_hashes(leaf_hashes): |
339 | + check_leaf_hashes(leaf_hashes, protocol) |
340 | + digest_bytes = protocol.digest_bytes |
341 | + for i in range(len(leaf_hashes) // digest_bytes): |
342 | + yield (i, leaf_hashes[i*digest_bytes : (i+1)*digest_bytes]) |
343 | + |
344 | + |
345 | +def iter_leaf_hashes(leaf_hashes, protocol=PROTOCOL): |
346 | """ |
347 | Iterate through *leaf_hashes*. |
348 | |
349 | @@ -657,12 +554,13 @@ |
350 | [b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', b'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'] |
351 | |
352 | """ |
353 | - check_leaf_hashes(leaf_hashes) |
354 | - for i in range(len(leaf_hashes) // DIGEST_BYTES): |
355 | - yield leaf_hashes[i*DIGEST_BYTES : (i+1)*DIGEST_BYTES] |
356 | - |
357 | - |
358 | -def get_leaf_hash(leaf_hashes, i): |
359 | + check_leaf_hashes(leaf_hashes, protocol) |
360 | + digest_bytes = protocol.digest_bytes |
361 | + for i in range(len(leaf_hashes) // digest_bytes): |
362 | + yield leaf_hashes[i*digest_bytes : (i+1)*digest_bytes] |
363 | + |
364 | + |
365 | +def get_leaf_hash(leaf_hashes, i, protocol=PROTOCOL): |
366 | """ |
367 | Return a slice containing the i-th leaf_hash from *leaf_hashes*. |
368 | |
369 | @@ -677,7 +575,8 @@ |
370 | b'' |
371 | |
372 | """ |
373 | - return leaf_hashes[i*DIGEST_BYTES : (i+1)*DIGEST_BYTES] |
374 | + digest_bytes = protocol.digest_bytes |
375 | + return leaf_hashes[i*digest_bytes : (i+1)*digest_bytes] |
376 | |
377 | |
378 | def missing_leaves(fp, leaf_hashes): |
379 | @@ -950,7 +849,7 @@ |
380 | thread.join() # Make sure batch_reader() terminates |
381 | |
382 | |
383 | -def hash_fp(src_fp, dst_fp=None): |
384 | +def hash_fp(src_fp, dst_fp=None, protocol=PROTOCOL): |
385 | """ |
386 | Compute content hash of open file *src_fp*, optionally writing *dst_fp*. |
387 | |
388 | @@ -963,7 +862,7 @@ |
389 | :param src_fp: A file opened in mode "rb" or "r+b" |
390 | :param dst_fp: An optional file opened in mode "wd" |
391 | """ |
392 | - hasher = Hasher() |
393 | + hasher = Hasher(protocol) |
394 | for leaf in reader_iter(src_fp): |
395 | hasher.hash_leaf(leaf) |
396 | if dst_fp: |
397 | @@ -1093,7 +992,7 @@ |
398 | subdir = path.join(self.basedir, 'files', prefix) |
399 | for name in sorted(os.listdir(subdir)): |
400 | _id = prefix + name |
401 | - if len(_id) != DIGEST_B32LEN: |
402 | + if len(_id) not in ALLOWED_B32LEN: |
403 | continue |
404 | if not set(_id).issubset(B32ALPHABET): |
405 | continue |
406 | @@ -1296,7 +1195,7 @@ |
407 | a ContentHash namedtuple |
408 | """ |
409 | src_fp = self.open(_id) |
410 | - c = hash_fp(src_fp) |
411 | + c = hash_fp(src_fp, protocol=get_protocol(_id)) |
412 | if c.id != _id: |
413 | raise self.move_to_corrupt(src_fp, _id, bad_id=c.id) |
414 | if return_fp: |
415 | @@ -1688,38 +1587,3 @@ |
416 | except CorruptFile as e: |
417 | yield e |
418 | |
419 | - |
420 | -class TempFileStore(FileStore): |
421 | - """ |
422 | - A throw-away `FileStore` for experimenting and testing. |
423 | - |
424 | - When you create a `TempFileStore` instance, a random, temporary |
425 | - `FileStore.parentdir` is created for you. For example: |
426 | - |
427 | - >>> fs = TempFileStore() |
428 | - |
429 | - Then when the `TempFileStore` instance is garbage collected, this |
430 | - directory is automatically deleted (along with any files contained |
431 | - therein). |
432 | - |
433 | - As with a `FileStore`, you can provide the `FileStore.id` and |
434 | - `FileStore.copies` attributes. The `FileStore` itself ignores these |
435 | - values, but they are convenient for high-level software like Dmedia. |
436 | - |
437 | - For example: |
438 | - |
439 | - >>> fs = TempFileStore('hello', 3) |
440 | - >>> fs.id |
441 | - 'hello' |
442 | - >>> fs.copies |
443 | - 3 |
444 | - |
445 | - """ |
446 | - def __init__(self, _id=None, copies=0): |
447 | - parentdir = tempfile.mkdtemp(prefix='TempFileStore.') |
448 | - super().__init__(parentdir, _id, copies) |
449 | - assert self.parentdir is parentdir |
450 | - |
451 | - def __del__(self): |
452 | - if path.isdir(self.parentdir): |
453 | - shutil.rmtree(self.parentdir) |
454 | |
455 | === added directory 'filestore/data' |
456 | === added file 'filestore/data/test-vectors.json' |
457 | --- filestore/data/test-vectors.json 1970-01-01 00:00:00 +0000 |
458 | +++ filestore/data/test-vectors.json 2012-12-31 04:35:24 +0000 |
459 | @@ -0,0 +1,50 @@ |
460 | +{ |
461 | + "48": { |
462 | + "leaf_hashes": { |
463 | + "A": [ |
464 | + "FrZmE1OIglZp4N1Rl9HD6MuAQXDGqx57KgsPyXLL", |
465 | + "DaIG8amV8Jl6u8Qa2uiKWQhmgV2r5IxlTg3Tt70l" |
466 | + ], |
467 | + "B": [ |
468 | + "atOnHDzrEkWhr7pFMIkfrPfNcNGbd+CN/c7aTVu5", |
469 | + "AsAN/81Fu8tHrfJxoj4+qVKPgFqv3pFEkxoNGCRt" |
470 | + ], |
471 | + "C": [ |
472 | + "zD0aCp/ozlcPJDNX70E0ZvgN0B3czawr6zxpyVJl", |
473 | + "YLJzcaFfa2UKCARHePkHbvgFoGf7q4Bns2jXdCgc" |
474 | + ] |
475 | + }, |
476 | + "root_hashes": { |
477 | + "A": "O4NQ4LFNI2UCMKOSWWGPRZXSNEPW2M6OOTBFM3AHTBK35D67", |
478 | + "B": "NW6IMGBAU4NTZDQRHGIZA7VYV5CBZ5TUXADHZE43N4GDZGKK", |
479 | + "C": "TRXCE6PVSJTHQX7H4KV7A7F5DKCMSETLMNANNCJIVT624WLU", |
480 | + "CA": "QSUCTH4LJU3B7QG7IJ3FC7HNE4TXDPZVQEJ36B4A45ATTXDM", |
481 | + "CB": "3FWPZ6RBPXGBSTFBNPK5IOCO7DUMSKR5YTWJ3HM2KI4AEF2O", |
482 | + "CC": "CX2K52LIUTT6SSMFHR7NUI6YIJLO3SGKLSRIL6IDY4BP7HUZ" |
483 | + } |
484 | + }, |
485 | + "56": { |
486 | + "leaf_hashes": { |
487 | + "A": [ |
488 | + "vnqPKTOknItURAuVQi5gHml2ncDDv/apR7CRbZ8Gw57L9S0=", |
489 | + "mQX/95lrNeZNnsQKj02VnNXIqgRjwGnqLoZoF9EHkmmhjjs=" |
490 | + ], |
491 | + "B": [ |
492 | + "f776qpuQhQO1EWR1nrUCIxD9+sIqTcBb2AgJ6iOK8Gq8bIg=", |
493 | + "ygruy07GH5BRvrwtO2TCGCdxGjpOG/ogF+8sytiTYihTfsw=" |
494 | + ], |
495 | + "C": [ |
496 | + "jbRklQZ8C9suPd0Ar/5vO2JVMoHEQ62ksuG1EsYtPbzr8FE=", |
497 | + "uGq34DS/oD0NT0zxWl562rYAUBbqeRZDvNtg0YDujBPzBGQ=" |
498 | + ] |
499 | + }, |
500 | + "root_hashes": { |
501 | + "A": "FWV6OJYI36C5NN5DC4GS2IGWZXFCZCGJGHK35YV62LKAG7D2Z4LO4Z2S", |
502 | + "B": "OB756PX5V32JMKJAFKIAJ4AFSFPA2WLNIK32ELNO4FJLJPEEEN6DCAAJ", |
503 | + "C": "QSOHXCDH64IQBOG2NM67XEC6MLZKKPGBTISWWRPMCFCJ2EKMA2SMLY46", |
504 | + "CA": "BQ5UTB33ML2VDTCTLVXK6N4VSMGGKKKDYKG24B6DOAFJB6NRSGMB5BNO", |
505 | + "CB": "ER3LDDZ2LHMTDLOPE5XA5GEEZ6OE45VFIFLY42GEMV4TSZ2B7GJJXAIX", |
506 | + "CC": "R6RN5KL7UBNJWR5SK5YPUKIGAOWWFMYYOVESU5DPT34X5MEK75PXXYIX" |
507 | + } |
508 | + } |
509 | +} |
510 | \ No newline at end of file |
511 | |
512 | === added file 'filestore/misc.py' |
513 | --- filestore/misc.py 1970-01-01 00:00:00 +0000 |
514 | +++ filestore/misc.py 2012-12-31 04:35:24 +0000 |
515 | @@ -0,0 +1,124 @@ |
516 | +# filestore: Dmedia hashing protocol and file layout |
517 | +# Copyright (C) 2012 Novacut Inc |
518 | +# |
519 | +# This file is part of `filestore`. |
520 | +# |
521 | +# `filestore` is free software: you can redistribute it and/or modify it under |
522 | +# the terms of the GNU Affero General Public License as published by the Free |
523 | +# Software Foundation, either version 3 of the License, or (at your option) any |
524 | +# later version. |
525 | +# |
526 | +# `filestore` is distributed in the hope that it will be useful, but WITHOUT ANY |
527 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
528 | +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
529 | +# details. |
530 | +# |
531 | +# You should have received a copy of the GNU Affero General Public License along |
532 | +# with `filestore`. If not, see <http://www.gnu.org/licenses/>. |
533 | +# |
534 | +# Authors: |
535 | +# Jason Gerard DeRose <jderose@novacut.com> |
536 | + |
537 | +""" |
538 | +Helper functions for creating a utilizing protocol test vectors. |
539 | +""" |
540 | + |
541 | +from os import path |
542 | +import json |
543 | +from base64 import b64encode, b64decode |
544 | +import tempfile |
545 | +import shutil |
546 | + |
547 | +from . import FileStore |
548 | + |
549 | + |
550 | +TEST_VECTORS = path.join( |
551 | + path.dirname(path.abspath(__file__)), 'data', 'test-vectors.json' |
552 | +) |
553 | +assert path.isfile(TEST_VECTORS) |
554 | + |
555 | + |
556 | +def encode(value): |
557 | + return b64encode(value).decode('utf-8') |
558 | + |
559 | + |
560 | +def decode(value): |
561 | + return b64decode(value.encode('utf-8')) |
562 | + |
563 | + |
564 | +def load_test_vectors(): |
565 | + return json.load(open(TEST_VECTORS, 'r')) |
566 | + |
567 | + |
568 | +def build_test_leaves(leaf_size): |
569 | + return { |
570 | + 'A': b'A', |
571 | + 'B': b'B' * (leaf_size - 1), |
572 | + 'C': b'C' * leaf_size, |
573 | + } |
574 | + |
575 | + |
576 | +def build_test_vectors(protocol): |
577 | + leaves = build_test_leaves(protocol.leaf_size) |
578 | + |
579 | + def hash_root(letters): |
580 | + leaf_hashes = b'' |
581 | + file_size = 0 |
582 | + for (i, L) in enumerate(letters): |
583 | + data = leaves[L] |
584 | + leaf_hashes += protocol.hash_leaf(i, data) |
585 | + file_size += len(data) |
586 | + return protocol.hash_root(file_size, leaf_hashes) |
587 | + |
588 | + vectors = { |
589 | + 'leaf_hashes': {}, |
590 | + 'root_hashes': {}, |
591 | + } |
592 | + |
593 | + for (key, data) in leaves.items(): |
594 | + vectors['leaf_hashes'][key] = [ |
595 | + encode(protocol.hash_leaf(i, data)) for i in range(2) |
596 | + ] |
597 | + |
598 | + for L in leaves: |
599 | + for key in (L, 'C' + L): |
600 | + vectors['root_hashes'][key] = hash_root(key) |
601 | + |
602 | + return vectors |
603 | + |
604 | + |
605 | +class TempFileStore(FileStore): |
606 | + """ |
607 | + A throw-away `FileStore` for experimenting and testing. |
608 | + |
609 | + When you create a `TempFileStore` instance, a random, temporary |
610 | + `FileStore.parentdir` is created for you. For example: |
611 | + |
612 | + >>> fs = TempFileStore() |
613 | + |
614 | + Then when the `TempFileStore` instance is garbage collected, this |
615 | + directory is automatically deleted (along with any files contained |
616 | + therein). |
617 | + |
618 | + As with a `FileStore`, you can provide the `FileStore.id` and |
619 | + `FileStore.copies` attributes. The `FileStore` itself ignores these |
620 | + values, but they are convenient for high-level software like Dmedia. |
621 | + |
622 | + For example: |
623 | + |
624 | + >>> fs = TempFileStore('hello', 3) |
625 | + >>> fs.id |
626 | + 'hello' |
627 | + >>> fs.copies |
628 | + 3 |
629 | + |
630 | + """ |
631 | + def __init__(self, _id=None, copies=0): |
632 | + parentdir = tempfile.mkdtemp(prefix='TempFileStore.') |
633 | + super().__init__(parentdir, _id, copies) |
634 | + assert self.parentdir is parentdir |
635 | + |
636 | + def __del__(self): |
637 | + if path.isdir(self.parentdir): |
638 | + shutil.rmtree(self.parentdir) |
639 | + |
640 | |
641 | === added file 'filestore/protocols.py' |
642 | --- filestore/protocols.py 1970-01-01 00:00:00 +0000 |
643 | +++ filestore/protocols.py 2012-12-31 04:35:24 +0000 |
644 | @@ -0,0 +1,386 @@ |
645 | +# filestore: Dmedia hashing protocol and file layout |
646 | +# Copyright (C) 2012 Novacut Inc |
647 | +# |
648 | +# This file is part of `filestore`. |
649 | +# |
650 | +# `filestore` is free software: you can redistribute it and/or modify it under |
651 | +# the terms of the GNU Affero General Public License as published by the Free |
652 | +# Software Foundation, either version 3 of the License, or (at your option) any |
653 | +# later version. |
654 | +# |
655 | +# `filestore` is distributed in the hope that it will be useful, but WITHOUT ANY |
656 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
657 | +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
658 | +# details. |
659 | +# |
660 | +# You should have received a copy of the GNU Affero General Public License along |
661 | +# with `filestore`. If not, see <http://www.gnu.org/licenses/>. |
662 | +# |
663 | +# Authors: |
664 | +# Jason Gerard DeRose <jderose@novacut.com> |
665 | + |
666 | +""" |
667 | +Generic API for the Dmedia Hashing Protocol, both current and future versions. |
668 | + |
669 | +Dmedia is a very loosely coupled distributed object store, and in order for it |
670 | +to achieve its goals, two key constraints were needed: |
671 | + |
672 | + 1. File in Dmedia are read-only (you can't modify an existing file, you can |
673 | + only insert an entire new file) |
674 | + |
675 | + 2. Files can only be addressed by their content-hash |
676 | + |
677 | +We feel the properties we get are well-worth these limitations, but assigning |
678 | +global file IDs based on their content hash is not without peril. For some |
679 | +background on the challenges and limitations of content-based addressing, see |
680 | +this excellent paper by Valerie Aurora: |
681 | + |
682 | + http://valerieaurora.org/monkey.html |
683 | + |
684 | +Our take-away from the above paper is that: |
685 | + |
686 | + * We should assume the hashing protocol will have a limited useful |
687 | + lifetime, perhaps not longer than 10 years or so |
688 | + |
689 | + * We should today have in place exact plans for how a new version of the |
690 | + hashing protocol will be introduced and how multiple versions of the |
691 | + protocol can coexist side-by-side during a long transition period |
692 | + |
693 | +In short, our migration strategy is to use the digest length as a way to select |
694 | +the appropriate protocol version. For example, say the version 1 hashing |
695 | +protocol has a 280-bit digest. And then in a decade from now when a new |
696 | +protocol is needed, the new protocol will have a different digest length, say |
697 | +320-bit. |
698 | + |
699 | +This allows files hashed under different versions of the protocol to be stored |
700 | +together in a FileStore. At any given time there will be one or more valid |
701 | +digest lengths, each one selecting a specific protocol version. So if you |
702 | +wanted to verify every file in a FileStore, you would iterate through them one |
703 | +by one, select the protocol version based on the ID length, and then see if the |
704 | +hash you compute matches the ID. |
705 | + |
706 | +So that our reference implementation is well architected for the inevitable, |
707 | +we're going to make all relevant code paths select the protocol version based |
708 | +on the ID length, even when there is only one protocol available. |
709 | + |
710 | +We want to practice now, rather than later. This will force us to work within |
711 | +this constraint, and will mean we develop design patterns that are prepared for |
712 | +the future. |
713 | + |
714 | +We might also do a full practice migration from the current protocol in use |
715 | +(which we're calling version zero), to the proposed version one protocol. |
716 | + |
717 | +Although we've never made any official commitment to the version zero protocol, |
718 | +it's been around long enough that we really need at least migration |
719 | +functionality, if not to support it alongside the version one protocol. |
720 | +Despite not being production ready, some people have built fairly large Dmedia |
721 | +libraries already. |
722 | + |
723 | +For reference, the version zero protocol has been unchanged since November 2011: |
724 | + |
725 | + https://launchpad.net/filestore/trunk/11.09 |
726 | + |
727 | +In hind sight, we should have used a smaller digest size than expected for the |
728 | +version zero protocol, and then planned to do a practice protocol change by at |
729 | +least upping the digest size, even if no other changes were made. |
730 | + |
731 | +In our opinion, a 240-bit digest size is probably the best choice for the |
732 | +version one protocol, but unfortunately we can't use that if we want to put |
733 | +Dmedia through a realistic migration scenario. And the valuable lessons we'd |
734 | +learn from such a practice scenario probably outweigh the downside not having |
735 | +what we currently feel is the ideal digest size. |
736 | + |
737 | +But even if multiple protocols are only supported in the context of unit tests, |
738 | +we still feel this is a very import architecture to embrace now. |
739 | +""" |
740 | + |
741 | +from base64 import b32encode |
742 | + |
743 | +from skein import skein512 |
744 | + |
745 | + |
746 | +# Skein personalization strings used in the Dmedia Hashing Protocol: |
747 | +PERS_LEAF = b'20110430 jderose@novacut.com dmedia/leaf' |
748 | +PERS_ROOT = b'20110430 jderose@novacut.com dmedia/root' |
749 | + |
750 | +# Additional Skein personalization strings used only in the old protocol: |
751 | +PERS_LEAF_INDEX = b'20110430 jderose@novacut.com dmedia/leaf-index' |
752 | +PERS_FILE_SIZE = b'20110430 jderose@novacut.com dmedia/file-size' |
753 | + |
754 | +# Provide very clear TypeError messages: |
755 | +TYPE_ERROR = '{}: need a {!r}; got a {!r}: {!r}' |
756 | + |
757 | +# Handy: |
758 | +MiB = 1024 ** 2 |
759 | + |
760 | + |
761 | +class Protocol: |
762 | + """ |
763 | + Standard API for current and future versions of the Dmedia Hashing Protocol. |
764 | + |
765 | + This class provides a standard API that abstracts away the details of a |
766 | + particular hashing protocol and allows multiple protocol versions to be |
767 | + supported simultaneously. |
768 | + |
769 | + The API consists of two methods: |
770 | + |
771 | + `Protocol.hash_leaf(leaf_index, leaf_data, challange=b'')` |
772 | + |
773 | + `Protocol.hash_root(file_size, leaf_hashes)` |
774 | + |
775 | + And four attributes: |
776 | + |
777 | + `Protocol.leaf_size` |
778 | + |
779 | + `Protocol.digest_bits` |
780 | + |
781 | + `Protocol.digest_bytes` |
782 | + |
783 | + `Protocol.digest_b32len` |
784 | + |
785 | + To make it easy to experiment with different *leaf_size* and *digest_bits* |
786 | + (while still using the standard hashing functions), you can simply create |
787 | + a `Protocol` instance like this: |
788 | + |
789 | + >>> experimental = Protocol(16 * MiB, 360) |
790 | + |
791 | + The `digest_bytes` and `digest_b32len` will be derived from the provided |
792 | + *digest_bits*: |
793 | + |
794 | + >>> experimental.digest_bytes |
795 | + 45 |
796 | + >>> experimental.digest_b32len |
797 | + 72 |
798 | + |
799 | + In order to implement a protocol that uses different underlying hash |
800 | + functions, you'll need to subclass `Protocol` and override the two |
801 | + low-level hashing methods: |
802 | + |
803 | + `Protocol._hash_leaf(leaf_index, leaf_data, challange)` |
804 | + |
805 | + `Protocol._hash_root(file_size, leaf_hashes)` |
806 | + """ |
807 | + |
808 | + def __init__(self, leaf_size, digest_bits): |
809 | + """ |
810 | + Initialize a new `Protocol` instance. |
811 | + |
812 | + :param leaf_size: an ``int`` which is some multiple of MiB and >= MiB |
813 | + :param digest_bits: an ``int`` which is some multiple of 40 and >= 200 |
814 | + """ |
815 | + if not isinstance(leaf_size, int): |
816 | + raise TypeError( |
817 | + TYPE_ERROR.format('leaf_size', int, type(leaf_size), leaf_size) |
818 | + ) |
819 | + if leaf_size < MiB: |
820 | + raise ValueError( |
821 | + 'leaf_size: must be >= MiB; got {!r}'.format(leaf_size) |
822 | + ) |
823 | + if leaf_size % MiB != 0: |
824 | + raise ValueError( |
825 | + 'leaf_size: must be multiple of MiB; got {!r}'.format(leaf_size) |
826 | + ) |
827 | + if not isinstance(digest_bits, int): |
828 | + raise TypeError( |
829 | + TYPE_ERROR.format( |
830 | + 'digest_bits', int, type(digest_bits), digest_bits |
831 | + ) |
832 | + ) |
833 | + if digest_bits < 200: |
834 | + raise ValueError( |
835 | + 'digest_bits: must be >= 200; got {!r}'.format(digest_bits) |
836 | + ) |
837 | + if digest_bits % 40 != 0: |
838 | + raise ValueError( |
839 | + 'digest_bits: must be multiple of 40; got {!r}'.format( |
840 | + digest_bits |
841 | + ) |
842 | + ) |
843 | + self.leaf_size = leaf_size |
844 | + self.digest_bits = digest_bits |
845 | + self.digest_bytes = digest_bits // 8 |
846 | + self.digest_b32len = digest_bits // 5 |
847 | + |
848 | + def hash_leaf(self, leaf_index, leaf_data, challenge=b''): |
849 | + """ |
850 | + Hash the leaf at (zero) index *leaf_index* containing *leaf_data*. |
851 | + |
852 | + For example: |
853 | + |
854 | + >>> p = Protocol(MiB, 200) |
855 | + >>> b32encode(p.hash_leaf(0, b'The Leaf Data')) |
856 | + b'EFBXKNNDAMRFIIPTIL2DYZ36YYQDAX54AAOYA7WG' |
857 | + |
858 | + The optional *challenge* keyword argument should be a random nonce used |
859 | + to make a remote cloud storage service prove that they actually have |
860 | + the complete, exact file stored. For example: |
861 | + |
862 | + >>> b32encode(p.hash_leaf(0, b'The Leaf Data', b'Random Nonce')) |
863 | + b'LBAECE4GW3UFXREWAFBMBGV2O4GISUTJ3ALZXGJW' |
864 | + |
865 | + :param leaf_index: an ``int`` >= 0 |
866 | + :param leaf_data: a ``bytes`` instance with leaf content |
867 | + :param challenge: a ``bytes`` instance containing a random nonce to be |
868 | + used in a proof-of-storage challenge; default is ``b''`` |
869 | + """ |
870 | + if not isinstance(leaf_index, int): |
871 | + raise TypeError( |
872 | + TYPE_ERROR.format( |
873 | + 'leaf_index', int, type(leaf_index), leaf_index |
874 | + ) |
875 | + ) |
876 | + if leaf_index < 0: |
877 | + raise ValueError( |
878 | + 'leaf_index: must be >= 0; got {!r}'.format(leaf_index) |
879 | + ) |
880 | + if not isinstance(leaf_data, bytes): |
881 | + raise TypeError( |
882 | + TYPE_ERROR.format( |
883 | + 'leaf_data', bytes, type(leaf_data), leaf_data |
884 | + ) |
885 | + ) |
886 | + if not (1 <= len(leaf_data) <= self.leaf_size): |
887 | + raise ValueError('Need 1 <= len(leaf_data) <= {}; got {}'.format( |
888 | + self.leaf_size, len(leaf_data) |
889 | + ) |
890 | + ) |
891 | + digest = self._hash_leaf(leaf_index, leaf_data, challenge) |
892 | + assert len(digest) == self.digest_bytes |
893 | + return digest |
894 | + |
895 | + def hash_root(self, file_size, leaf_hashes): |
896 | + """ |
897 | + Return the root-hash for a file *file_size* bytes with *leaf_hashes*. |
898 | + |
899 | + For example: |
900 | + |
901 | + >>> p = Protocol(MiB, 200) |
902 | + >>> p.hash_root(1776, b'DDDDDDDDDDDDDDDDDDDDDDDDD') |
903 | + 'SJGXSO43OF5424G6HAEG55X2SFCZRW7L7LG42UKN' |
904 | + |
905 | + :param file_size: an ``int`` >= 1 |
906 | + :param leaf_hashes: a ``bytes`` instance containing the concatenated |
907 | + leaf-hashes produced by `Protocol.hash_leaf()`. |
908 | + """ |
909 | + if not isinstance(file_size, int): |
910 | + raise TypeError( |
911 | + TYPE_ERROR.format('file_size', int, type(file_size), file_size) |
912 | + ) |
913 | + if file_size < 1: |
914 | + raise ValueError( |
915 | + 'file_size: must be >= 1; got {!r}'.format(file_size) |
916 | + ) |
917 | + if not isinstance(leaf_hashes, bytes): |
918 | + raise TypeError(TYPE_ERROR.format( |
919 | + 'leaf_hashes', bytes, type(leaf_hashes), leaf_hashes |
920 | + ) |
921 | + ) |
922 | + if len(leaf_hashes) == 0: |
923 | + raise ValueError('leaf_hashes cannot be empty') |
924 | + if len(leaf_hashes) % self.digest_bytes != 0: |
925 | + raise ValueError( |
926 | + 'len(leaf_hashes) is {}, not multiple of {}'.format( |
927 | + len(leaf_hashes), self.digest_bytes |
928 | + ) |
929 | + ) |
930 | + n = len(leaf_hashes) // self.digest_bytes |
931 | + low = (n - 1) * self.leaf_size + 1 |
932 | + high = n * self.leaf_size |
933 | + if not (low <= file_size <= high): |
934 | + raise ValueError( |
935 | + 'Need {} <= file_size <= {}; got {}'.format( |
936 | + low, high, file_size |
937 | + ) |
938 | + ) |
939 | + digest = self._hash_root(file_size, leaf_hashes) |
940 | + assert len(digest) == self.digest_bytes |
941 | + b32digest = b32encode(digest).decode('utf-8') |
942 | + assert len(b32digest) == self.digest_b32len |
943 | + return b32digest |
944 | + |
945 | + def _hash_leaf(self, leaf_index, leaf_data, challenge): |
946 | + """ |
947 | + Protocol version 1 leaf-hashing implementation. |
948 | + |
949 | + Subclasses can override this to use different hash functions, |
950 | + configurations, etc. |
951 | + """ |
952 | + assert leaf_index >= 0 |
953 | + assert 1 <= len(leaf_data) <= self.leaf_size |
954 | + return skein512(leaf_data, |
955 | + digest_bits=self.digest_bits, |
956 | + pers=PERS_LEAF, |
957 | + key=str(leaf_index).encode('utf-8'), |
958 | + nonce=challenge, |
959 | + ).digest() |
960 | + |
961 | + def _hash_root(self, file_size, leaf_hashes): |
962 | + """ |
963 | + Protocol version 1 root-hashing implementation. |
964 | + |
965 | + Subclasses can override this to use different hash functions, |
966 | + configurations, etc. |
967 | + """ |
968 | + assert file_size >= 1 |
969 | + assert len(leaf_hashes) > 0 |
970 | + assert len(leaf_hashes) % self.digest_bytes == 0 |
971 | + return skein512(leaf_hashes, |
972 | + digest_bits=self.digest_bits, |
973 | + pers=PERS_ROOT, |
974 | + key=str(file_size).encode('utf-8'), |
975 | + ).digest() |
976 | + |
977 | + |
978 | +VERSION1 = Protocol(8 * MiB, 280) |
979 | + |
980 | + |
981 | +class OldProtocol(Protocol): |
982 | + """ |
983 | + The unofficial version zero protocol. |
984 | + |
985 | + Although no formal support commitments were ever made for this version zero |
986 | + protocol, it has been used enough in the wild that we should really |
987 | + continue to support it and provide a nice migration to the version one |
988 | + protocol. |
989 | + """ |
990 | + |
991 | + def _hash_leaf_index(self, leaf_index): |
992 | + assert leaf_index >= 0 |
993 | + return skein512(str(leaf_index).encode('utf-8'), |
994 | + digest_bits=self.digest_bits, |
995 | + pers=PERS_LEAF_INDEX, |
996 | + ).digest() |
997 | + |
998 | + def _hash_leaf(self, leaf_index, leaf_data, challenge): |
999 | + assert leaf_index >= 0 |
1000 | + assert 1 <= len(leaf_data) <= self.leaf_size |
1001 | + skein = skein512(self._hash_leaf_index(leaf_index), |
1002 | + digest_bits=self.digest_bits, |
1003 | + pers=PERS_LEAF, |
1004 | + nonce=challenge, |
1005 | + ) |
1006 | + skein.update(leaf_data) |
1007 | + return skein.digest() |
1008 | + |
1009 | + def _hash_file_size(self, file_size): |
1010 | + assert file_size >= 1 |
1011 | + return skein512(str(file_size).encode('utf-8'), |
1012 | + digest_bits=self.digest_bits, |
1013 | + pers=PERS_FILE_SIZE, |
1014 | + ).digest() |
1015 | + |
1016 | + def _hash_root(self, file_size, leaf_hashes): |
1017 | + assert file_size >= 1 |
1018 | + assert len(leaf_hashes) > 0 |
1019 | + assert len(leaf_hashes) % self.digest_bytes == 0 |
1020 | + skein = skein512(self._hash_file_size(file_size), |
1021 | + digest_bits=self.digest_bits, |
1022 | + pers=PERS_ROOT, |
1023 | + ) |
1024 | + skein.update(leaf_hashes) |
1025 | + return skein.digest() |
1026 | + |
1027 | + |
1028 | +VERSION0 = OldProtocol(8 * MiB, 240) |
1029 | + |
1030 | +PROTOCOLS = (VERSION0, VERSION1) |
1031 | |
1032 | === added directory 'filestore/tests' |
1033 | === renamed file 'test_filestore.py' => 'filestore/tests/__init__.py' |
1034 | --- test_filestore.py 2012-12-11 12:48:03 +0000 |
1035 | +++ filestore/tests/__init__.py 2012-12-31 04:35:24 +0000 |
1036 | @@ -36,6 +36,8 @@ |
1037 | |
1038 | from skein import skein512 |
1039 | |
1040 | +from filestore.protocols import PROTOCOLS, VERSION0, VERSION1 |
1041 | +from filestore import misc |
1042 | import filestore |
1043 | |
1044 | |
1045 | @@ -595,242 +597,6 @@ |
1046 | ) |
1047 | self.assertEqual(len(accum), count) |
1048 | |
1049 | - def test_hash_leaf2(self): |
1050 | - # Test with wrong leaf_index type: |
1051 | - with self.assertRaises(TypeError) as cm: |
1052 | - filestore.hash_leaf2(0.0, None) |
1053 | - self.assertEqual( |
1054 | - str(cm.exception), |
1055 | - TYPE_ERROR.format('leaf_index', int, float, 0.0) |
1056 | - ) |
1057 | - with self.assertRaises(TypeError) as cm: |
1058 | - filestore.hash_leaf2(1.5, None) |
1059 | - self.assertEqual( |
1060 | - str(cm.exception), |
1061 | - TYPE_ERROR.format('leaf_index', int, float, 1.5) |
1062 | - ) |
1063 | - |
1064 | - # Test with wrong leaf_index value |
1065 | - with self.assertRaises(ValueError) as cm: |
1066 | - filestore.hash_leaf2(-1, None) |
1067 | - self.assertEqual( |
1068 | - str(cm.exception), |
1069 | - 'leaf_index: must be >= 0; got -1' |
1070 | - ) |
1071 | - with self.assertRaises(ValueError) as cm: |
1072 | - filestore.hash_leaf2(-17, None) |
1073 | - self.assertEqual( |
1074 | - str(cm.exception), |
1075 | - 'leaf_index: must be >= 0; got -17' |
1076 | - ) |
1077 | - |
1078 | - # Test with wrong leaf_data type |
1079 | - leaf_data = 'a' * 31415 |
1080 | - with self.assertRaises(TypeError) as cm: |
1081 | - filestore.hash_leaf2(2, leaf_data) |
1082 | - self.assertEqual( |
1083 | - str(cm.exception), |
1084 | - TYPE_ERROR.format('leaf_data', bytes, str, leaf_data) |
1085 | - ) |
1086 | - |
1087 | - # Test with wrong leaf_data length |
1088 | - toosmall = b'' |
1089 | - with self.assertRaises(ValueError) as cm: |
1090 | - filestore.hash_leaf2(2, toosmall) |
1091 | - self.assertEqual( |
1092 | - str(cm.exception), |
1093 | - 'Need 1 <= len(leaf_data) <= 8388608; got 0' |
1094 | - ) |
1095 | - toobig = b'a' * 8388609 |
1096 | - with self.assertRaises(ValueError) as cm: |
1097 | - filestore.hash_leaf2(2, toobig) |
1098 | - self.assertEqual( |
1099 | - str(cm.exception), |
1100 | - 'Need 1 <= len(leaf_data) <= 8388608; got 8388609' |
1101 | - ) |
1102 | - |
1103 | - # Test with good values: |
1104 | - leaf_data = b'D' |
1105 | - digest = filestore.hash_leaf2(0, leaf_data) |
1106 | - self.assertEqual( |
1107 | - digest, |
1108 | - skein512(leaf_data, |
1109 | - digest_bits=filestore.DIGEST_BITS, |
1110 | - pers=filestore.PERS_LEAF, |
1111 | - key=b'0', |
1112 | - ).digest() |
1113 | - ) |
1114 | - leaf_data = b'D' * filestore.LEAF_SIZE |
1115 | - digest = filestore.hash_leaf2(0, leaf_data) |
1116 | - self.assertEqual( |
1117 | - digest, |
1118 | - skein512(leaf_data, |
1119 | - digest_bits=filestore.DIGEST_BITS, |
1120 | - pers=filestore.PERS_LEAF, |
1121 | - key=b'0', |
1122 | - ).digest() |
1123 | - ) |
1124 | - |
1125 | - # Test with challenge: |
1126 | - challenge = os.urandom(16) |
1127 | - leaf_data = b'D' |
1128 | - digest = filestore.hash_leaf2(0, leaf_data, challenge) |
1129 | - self.assertEqual( |
1130 | - digest, |
1131 | - skein512(leaf_data, |
1132 | - digest_bits=filestore.DIGEST_BITS, |
1133 | - pers=filestore.PERS_LEAF, |
1134 | - key=b'0', |
1135 | - nonce=challenge, |
1136 | - ).digest() |
1137 | - ) |
1138 | - leaf_data = b'D' * filestore.LEAF_SIZE |
1139 | - digest = filestore.hash_leaf2(0, leaf_data, challenge) |
1140 | - self.assertEqual( |
1141 | - digest, |
1142 | - skein512(leaf_data, |
1143 | - digest_bits=filestore.DIGEST_BITS, |
1144 | - pers=filestore.PERS_LEAF, |
1145 | - key=b'0', |
1146 | - nonce=challenge, |
1147 | - ).digest() |
1148 | - ) |
1149 | - |
1150 | - # A 25k value sanity check on our crytographic claim that the |
1151 | - # leaf_index is tied to the leaf_hash: |
1152 | - count = 25 * 1000 |
1153 | - leaf_data = os.urandom(16) |
1154 | - accum = set( |
1155 | - filestore.hash_leaf2(i, leaf_data) |
1156 | - for i in range(count) |
1157 | - ) |
1158 | - self.assertEqual(len(accum), count) |
1159 | - |
1160 | - # A 25k random value sanity check on our crytographic claim that the |
1161 | - # leaf_data is tied to the leaf_hash: |
1162 | - accum = set( |
1163 | - filestore.hash_leaf2(21, os.urandom(16)) |
1164 | - for i in range(count) |
1165 | - ) |
1166 | - self.assertEqual(len(accum), count) |
1167 | - |
1168 | - # A 25k random value sanity check on our crytographic claim that the |
1169 | - # challenge is tied to the leaf_hash: |
1170 | - accum = set( |
1171 | - filestore.hash_leaf2(21, leaf_data, os.urandom(16)) |
1172 | - for i in range(count) |
1173 | - ) |
1174 | - self.assertEqual(len(accum), count) |
1175 | - |
1176 | - def test_hash_root2(self): |
1177 | - # Test with wrong file_size type: |
1178 | - with self.assertRaises(TypeError) as cm: |
1179 | - filestore.hash_root2(1.0, None) |
1180 | - self.assertEqual( |
1181 | - str(cm.exception), |
1182 | - TYPE_ERROR.format('file_size', int, float, 1.0) |
1183 | - ) |
1184 | - with self.assertRaises(TypeError) as cm: |
1185 | - filestore.hash_root2(1.5, None) |
1186 | - self.assertEqual( |
1187 | - str(cm.exception), |
1188 | - TYPE_ERROR.format('file_size', int, float, 1.5) |
1189 | - ) |
1190 | - |
1191 | - # Test with bad file_size value |
1192 | - with self.assertRaises(ValueError) as cm: |
1193 | - filestore.hash_root2(0, None) |
1194 | - self.assertEqual( |
1195 | - str(cm.exception), |
1196 | - 'file_size: must be >= 1; got 0' |
1197 | - ) |
1198 | - with self.assertRaises(ValueError) as cm: |
1199 | - filestore.hash_root2(-1, None) |
1200 | - self.assertEqual( |
1201 | - str(cm.exception), |
1202 | - 'file_size: must be >= 1; got -1' |
1203 | - ) |
1204 | - |
1205 | - # Test with wrong leaf_hashes type: |
1206 | - leaf_hashes = 'A' * 30 |
1207 | - with self.assertRaises(TypeError) as cm: |
1208 | - filestore.hash_root2(17, leaf_hashes) |
1209 | - self.assertEqual( |
1210 | - str(cm.exception), |
1211 | - TYPE_ERROR.format('leaf_hashes', bytes, str, leaf_hashes) |
1212 | - ) |
1213 | - |
1214 | - # Test with empty leaf_hashes |
1215 | - with self.assertRaises(ValueError) as cm: |
1216 | - filestore.hash_root2(17, b'') |
1217 | - self.assertEqual(str(cm.exception), 'leaf_hashes cannot be empty') |
1218 | - |
1219 | - # Test when len(leaf_hashes) is not a multiple of DIGEST_BYTES |
1220 | - with self.assertRaises(ValueError) as cm: |
1221 | - filestore.hash_root2(17, b'A' * 45) |
1222 | - self.assertEqual( |
1223 | - str(cm.exception), |
1224 | - 'len(leaf_hashes) is 45, not multiple of 30' |
1225 | - ) |
1226 | - |
1227 | - # Test low and high bounds |
1228 | - with self.assertRaises(ValueError) as cm: |
1229 | - filestore.hash_root2( |
1230 | - filestore.LEAF_SIZE + 1, |
1231 | - b'a' * filestore.DIGEST_BYTES |
1232 | - ) |
1233 | - self.assertEqual( |
1234 | - str(cm.exception), |
1235 | - 'Need 1 <= file_size <= 8388608; got 8388609' |
1236 | - ) |
1237 | - with self.assertRaises(ValueError) as cm: |
1238 | - filestore.hash_root2( |
1239 | - filestore.LEAF_SIZE, |
1240 | - b'ab' * filestore.DIGEST_BYTES |
1241 | - ) |
1242 | - self.assertEqual( |
1243 | - str(cm.exception), |
1244 | - 'Need 8388609 <= file_size <= 16777216; got 8388608' |
1245 | - ) |
1246 | - with self.assertRaises(ValueError) as cm: |
1247 | - filestore.hash_root2( |
1248 | - 2 * filestore.LEAF_SIZE + 1, |
1249 | - b'ab' * filestore.DIGEST_BYTES |
1250 | - ) |
1251 | - self.assertEqual( |
1252 | - str(cm.exception), |
1253 | - 'Need 8388609 <= file_size <= 16777216; got 16777217' |
1254 | - ) |
1255 | - |
1256 | - # Test with good file_size and leaf_hashes |
1257 | - leaf_hashes = b'D' * filestore.DIGEST_BYTES |
1258 | - self.assertEqual( |
1259 | - filestore.hash_root2(1, leaf_hashes), |
1260 | - 'AGXSQ2KPM7IZTYFHGPOHPN5TKJV6A2S54RPH7GRZONKNOJLS' |
1261 | - ) |
1262 | - self.assertEqual( |
1263 | - filestore.hash_root2(filestore.LEAF_SIZE, leaf_hashes), |
1264 | - 'QSCEN5L4DVJBKNNLC34B7YQAQAHY43FNKWVD2ULQHRG3SUQ5' |
1265 | - ) |
1266 | - |
1267 | - # A 25k value sanity check on our crytographic claim that the |
1268 | - # file_size is tied to the root_hash: |
1269 | - count = 25 * 1000 |
1270 | - leaf_hashes = os.urandom(filestore.DIGEST_BYTES) |
1271 | - accum = set( |
1272 | - filestore.hash_root2(size, leaf_hashes) |
1273 | - for size in range(1, count + 1) |
1274 | - ) |
1275 | - self.assertEqual(len(accum), count) |
1276 | - |
1277 | - # A 25k random value sanity check on our crytographic claim that the |
1278 | - # leaf_hashes are tied to the root_hash: |
1279 | - accum = set( |
1280 | - filestore.hash_root2(314159, os.urandom(filestore.DIGEST_BYTES)) |
1281 | - for i in range(count) |
1282 | - ) |
1283 | - self.assertEqual(len(accum), count) |
1284 | - |
1285 | def test_check_leaf_hashes(self): |
1286 | # Test with wrong type: |
1287 | leaf_hashes = 'a' * 60 |
1288 | @@ -853,16 +619,44 @@ |
1289 | filestore.check_leaf_hashes(leaf_hashes) |
1290 | self.assertIs(cm.exception.leaf_hashes, leaf_hashes) |
1291 | |
1292 | - # Test with good values |
1293 | - good = b'a' * 60 |
1294 | - self.assertIs(filestore.check_leaf_hashes(good), good) |
1295 | - |
1296 | - array = bytearray() |
1297 | - for x in range(256): |
1298 | - array.extend(x for i in range(30)) |
1299 | - big = bytes(array) |
1300 | - self.assertEqual(len(big), 30 * 256) |
1301 | - self.assertIs(filestore.check_leaf_hashes(big), big) |
1302 | + # Test with good values: |
1303 | + good30 = os.urandom(30) |
1304 | + self.assertIs(filestore.check_leaf_hashes(good30), good30) |
1305 | + self.assertIs( |
1306 | + filestore.check_leaf_hashes(good30, protocol=VERSION0), |
1307 | + good30 |
1308 | + ) |
1309 | + good35 = os.urandom(35) |
1310 | + self.assertIs( |
1311 | + filestore.check_leaf_hashes(good35, protocol=VERSION1), |
1312 | + good35 |
1313 | + ) |
1314 | + |
1315 | + # Test with wrong protocol version: |
1316 | + with self.assertRaises(filestore.LeafHashesError) as cm: |
1317 | + filestore.check_leaf_hashes(good30, protocol=VERSION1) |
1318 | + self.assertIs(cm.exception.leaf_hashes, good30) |
1319 | + with self.assertRaises(filestore.LeafHashesError) as cm: |
1320 | + filestore.check_leaf_hashes(good35, protocol=VERSION0) |
1321 | + self.assertIs(cm.exception.leaf_hashes, good35) |
1322 | + |
1323 | + # Future proofing: |
1324 | + for protocol in PROTOCOLS: |
1325 | + good = os.urandom(protocol.digest_bytes) |
1326 | + self.assertIs(filestore.check_leaf_hashes(good, protocol), good) |
1327 | + |
1328 | + good = os.urandom(18 * protocol.digest_bytes) |
1329 | + self.assertIs(filestore.check_leaf_hashes(good, protocol), good) |
1330 | + |
1331 | + bad = os.urandom(protocol.digest_bytes + 1) |
1332 | + with self.assertRaises(filestore.LeafHashesError) as cm: |
1333 | + filestore.check_leaf_hashes(bad, protocol) |
1334 | + self.assertIs(cm.exception.leaf_hashes, bad) |
1335 | + |
1336 | + bad = os.urandom(18 * protocol.digest_bytes - 1) |
1337 | + with self.assertRaises(filestore.LeafHashesError) as cm: |
1338 | + filestore.check_leaf_hashes(bad, protocol) |
1339 | + self.assertIs(cm.exception.leaf_hashes, bad) |
1340 | |
1341 | def test_check_root_hash(self): |
1342 | f = filestore.check_root_hash |
1343 | @@ -893,6 +687,62 @@ |
1344 | 'OYESBWEZ4Y2AGSLMNZB4ZF75A2VG7NXVB4R25SSMRGXLN4CR' |
1345 | ) |
1346 | |
1347 | + def test_check_root_hash2(self): |
1348 | + for protocol in PROTOCOLS: |
1349 | + file_size = protocol.leaf_size + 1 |
1350 | + leaf0_hash = os.urandom(protocol.digest_bytes) |
1351 | + leaf1_hash = os.urandom(protocol.digest_bytes) |
1352 | + leaf_hashes = leaf0_hash + leaf1_hash |
1353 | + _id = protocol.hash_root(file_size, leaf_hashes) |
1354 | + |
1355 | + # Everything is correct: |
1356 | + ch = filestore.check_root_hash(_id, file_size, leaf_hashes) |
1357 | + self.assertIsInstance(ch, filestore.ContentHash) |
1358 | + self.assertEqual(ch.id, _id) |
1359 | + self.assertEqual(ch.file_size, file_size) |
1360 | + self.assertEqual(ch.leaf_hashes, leaf_hashes) |
1361 | + self.assertEqual(ch, (_id, file_size, leaf_hashes)) |
1362 | + |
1363 | + # Everything is correct, unpack=True |
1364 | + ch = filestore.check_root_hash(_id, file_size, leaf_hashes, |
1365 | + unpack=True |
1366 | + ) |
1367 | + self.assertIsInstance(ch, filestore.ContentHash) |
1368 | + self.assertEqual(ch.id, _id) |
1369 | + self.assertEqual(ch.file_size, file_size) |
1370 | + self.assertEqual(ch.leaf_hashes, (leaf0_hash, leaf1_hash)) |
1371 | + self.assertEqual(ch, (_id, file_size, (leaf0_hash, leaf1_hash))) |
1372 | + |
1373 | + # Wrong ID: |
1374 | + bad_id = random_id(protocol.digest_bytes) |
1375 | + with self.assertRaises(filestore.RootHashError) as cm: |
1376 | + filestore.check_root_hash(bad_id, file_size, leaf_hashes) |
1377 | + self.assertEqual(cm.exception.id, bad_id) |
1378 | + self.assertEqual(cm.exception.file_size, file_size) |
1379 | + self.assertEqual(cm.exception.leaf_hashes, leaf_hashes) |
1380 | + self.assertEqual(cm.exception.bad_id, _id) |
1381 | + |
1382 | + # Wrong file_size: |
1383 | + with self.assertRaises(filestore.RootHashError) as cm: |
1384 | + filestore.check_root_hash(_id, file_size + 1, leaf_hashes) |
1385 | + self.assertEqual(cm.exception.id, _id) |
1386 | + self.assertEqual(cm.exception.file_size, file_size + 1) |
1387 | + self.assertEqual(cm.exception.leaf_hashes, leaf_hashes) |
1388 | + self.assertEqual(cm.exception.bad_id, |
1389 | + protocol.hash_root(file_size + 1, leaf_hashes) |
1390 | + ) |
1391 | + |
1392 | + # Wrong leaf_hashes |
1393 | + bad_leaf_hashes = leaf0_hash + os.urandom(protocol.digest_bytes) |
1394 | + with self.assertRaises(filestore.RootHashError) as cm: |
1395 | + filestore.check_root_hash(_id, file_size, bad_leaf_hashes) |
1396 | + self.assertEqual(cm.exception.id, _id) |
1397 | + self.assertEqual(cm.exception.file_size, file_size) |
1398 | + self.assertEqual(cm.exception.leaf_hashes, bad_leaf_hashes) |
1399 | + self.assertEqual(cm.exception.bad_id, |
1400 | + protocol.hash_root(file_size, bad_leaf_hashes) |
1401 | + ) |
1402 | + |
1403 | def test_enumerate_leaf_hashes(self): |
1404 | f = filestore.enumerate_leaf_hashes |
1405 | self.assertEqual( |
1406 | @@ -1011,28 +861,39 @@ |
1407 | |
1408 | def test_check_id(self): |
1409 | # Test with wrong type |
1410 | + bad = random_id(30).encode('utf-8') |
1411 | with self.assertRaises(TypeError) as cm: |
1412 | - filestore.check_id(42) |
1413 | + filestore.check_id(bad) |
1414 | self.assertEqual( |
1415 | str(cm.exception), |
1416 | - TYPE_ERROR.format('_id', str, int, 42) |
1417 | + TYPE_ERROR.format('_id', str, bytes, bad) |
1418 | ) |
1419 | |
1420 | - # Test with invalid base32 encoding: |
1421 | - bad = 'NWBNVXVK5DQGIOW7MYR4K3KA5K22W7N' |
1422 | - with self.assertRaises(filestore.IDError) as cm: |
1423 | - filestore.check_id(bad) |
1424 | - self.assertIs(cm.exception.id, bad) |
1425 | - |
1426 | # Test with wrong length: |
1427 | - bad = 'NWBNVXVK5DQGIOW7MYR4K3KA' |
1428 | - with self.assertRaises(filestore.IDError) as cm: |
1429 | - filestore.check_id(bad) |
1430 | - self.assertIs(cm.exception.id, bad) |
1431 | - |
1432 | - # Test with a good chash: |
1433 | - good = 'DHS36R6HGG52TQRB6BN7N66EWOPU4H67WISD3J6TWNCXXAZT' |
1434 | - self.assertIs(filestore.check_id(good), good) |
1435 | + for i in range(321): |
1436 | + value = 'N' * i |
1437 | + if len(value) in (48, 56): |
1438 | + self.assertIs(filestore.check_id(value), value) |
1439 | + continue |
1440 | + with self.assertRaises(filestore.IDError) as cm: |
1441 | + filestore.check_id(value) |
1442 | + self.assertIs(cm.exception.id, value) |
1443 | + |
1444 | + # Test with 48 and 56 character: |
1445 | + id48 = random_id(30) |
1446 | + self.assertIs(filestore.check_id(id48), id48) |
1447 | + id56 = random_id(35) |
1448 | + self.assertIs(filestore.check_id(id56), id56) |
1449 | + |
1450 | + # Test case sensitivity: |
1451 | + bad48 = id48.lower() |
1452 | + with self.assertRaises(filestore.IDError) as cm: |
1453 | + filestore.check_id(bad48) |
1454 | + self.assertIs(cm.exception.id, bad48) |
1455 | + bad56 = id56.lower() |
1456 | + with self.assertRaises(filestore.IDError) as cm: |
1457 | + filestore.check_id(bad56) |
1458 | + self.assertIs(cm.exception.id, bad56) |
1459 | |
1460 | def test_iter_files(self): |
1461 | """ |
1462 | @@ -1486,6 +1347,31 @@ |
1463 | src_fp = open(dst, 'rb') |
1464 | self.assertEqual(filestore.hash_fp(src_fp), ch) |
1465 | |
1466 | + def test_hash_fp2(self): |
1467 | + obj = misc.load_test_vectors() |
1468 | + for proto in PROTOCOLS: |
1469 | + tmp = TempDir() |
1470 | + leaves = misc.build_test_leaves(proto.leaf_size) |
1471 | + vectors = obj[str(proto.digest_b32len)] |
1472 | + for (key, value) in vectors['root_hashes'].items(): |
1473 | + name = tmp.join(key) |
1474 | + assert not path.exists(name) |
1475 | + fp = open(name, 'wb') |
1476 | + for L in key: |
1477 | + fp.write(leaves[L]) |
1478 | + fp.close() |
1479 | + fp = open(name, 'rb') |
1480 | + ch = filestore.hash_fp(fp, protocol=proto) |
1481 | + self.assertIsInstance(ch, filestore.ContentHash) |
1482 | + self.assertEqual(ch.id, value) |
1483 | + self.assertEqual(ch.file_size, path.getsize(name)) |
1484 | + self.assertEqual(ch.leaf_hashes, |
1485 | + b''.join( |
1486 | + misc.decode(vectors['leaf_hashes'][L][i]) |
1487 | + for (i, L) in enumerate(key) |
1488 | + ) |
1489 | + ) |
1490 | + |
1491 | def test_ensuredir(self): |
1492 | f = filestore.ensuredir |
1493 | tmp = TempDir() |
1494 | @@ -1747,7 +1633,7 @@ |
1495 | os.chmod(f, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) |
1496 | self.assertEqual(path.getsize(f), 7) |
1497 | self.assertEqual(list(fs), []) |
1498 | - long = tuple(random_id(35) for i in range(50)) |
1499 | + long = tuple(random_id(40) for i in range(50)) |
1500 | for _id in long: |
1501 | f = fs.join('files', _id[:2], _id[2:]) |
1502 | assert not path.exists(f) |
1503 | @@ -1788,10 +1674,26 @@ |
1504 | assert path.isdir(fs.path(_id)) |
1505 | self.assertEqual(list(fs), []) |
1506 | |
1507 | - # Now add valid files in |
1508 | + # Now add valid files in (48 character IDs) |
1509 | stats = [] |
1510 | for i in range(2000): |
1511 | - _id = random_id() |
1512 | + _id = random_id(30) |
1513 | + size = i + 1 |
1514 | + f = fs.path(_id) |
1515 | + assert not path.exists(f) |
1516 | + open(f, 'wb').write(b'N' * size) |
1517 | + os.chmod(f, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) |
1518 | + self.assertEqual(path.getsize(f), size) |
1519 | + st = filestore.Stat(_id, f, size, path.getmtime(f)) |
1520 | + self.assertEqual(fs.stat(_id), st) |
1521 | + stats.append(st) |
1522 | + self.assertNotEqual(list(fs), stats) # Sorting! |
1523 | + stats.sort(key=lambda s: s.id) |
1524 | + self.assertEqual(list(fs), stats) |
1525 | + |
1526 | + # Add more valid files in (56 character IDs) |
1527 | + for i in range(1000): |
1528 | + _id = random_id(35) |
1529 | size = i + 1 |
1530 | f = fs.path(_id) |
1531 | assert not path.exists(f) |
1532 | @@ -2186,6 +2088,31 @@ |
1533 | self.assertFalse(path.exists(canonical)) |
1534 | self.assertTrue(path.isfile(corrupt)) |
1535 | |
1536 | + def test_verify2(self): |
1537 | + tmp = TempDir() |
1538 | + fs = filestore.FileStore(tmp.dir) |
1539 | + obj = misc.load_test_vectors() |
1540 | + for proto in PROTOCOLS: |
1541 | + leaves = misc.build_test_leaves(proto.leaf_size) |
1542 | + vectors = obj[str(proto.digest_b32len)] |
1543 | + for (key, value) in vectors['root_hashes'].items(): |
1544 | + name = fs.path(value) |
1545 | + assert not path.exists(name) |
1546 | + fp = open(name, 'wb') |
1547 | + for L in key: |
1548 | + fp.write(leaves[L]) |
1549 | + fp.close() |
1550 | + ch = fs.verify(value) |
1551 | + self.assertIsInstance(ch, filestore.ContentHash) |
1552 | + self.assertEqual(ch.id, value) |
1553 | + self.assertEqual(ch.file_size, path.getsize(name)) |
1554 | + self.assertEqual(ch.leaf_hashes, |
1555 | + b''.join( |
1556 | + misc.decode(vectors['leaf_hashes'][L][i]) |
1557 | + for (i, L) in enumerate(key) |
1558 | + ) |
1559 | + ) |
1560 | + |
1561 | def test_verify_iter(self): |
1562 | tmp = TempDir() |
1563 | fs = filestore.FileStore(tmp.dir) |
1564 | @@ -2933,31 +2860,3 @@ |
1565 | fs2.verify(ID) |
1566 | fs.verify(ID) |
1567 | |
1568 | - |
1569 | -class TestTempFileStore(TestCase): |
1570 | - def test_init(self): |
1571 | - fs = filestore.TempFileStore() |
1572 | - self.assertIsInstance(fs, filestore.FileStore) |
1573 | - self.assertTrue(fs.parentdir.startswith('/tmp/TempFileStore.')) |
1574 | - self.assertIsNone(fs.id) |
1575 | - self.assertEqual(fs.copies, 0) |
1576 | - |
1577 | - fs = filestore.TempFileStore('foobar', 2) |
1578 | - self.assertIsInstance(fs, filestore.FileStore) |
1579 | - self.assertTrue(fs.parentdir.startswith('/tmp/TempFileStore.')) |
1580 | - self.assertEqual(fs.id, 'foobar') |
1581 | - self.assertEqual(fs.copies, 2) |
1582 | - |
1583 | - fs = filestore.TempFileStore(_id='hooray', copies=3) |
1584 | - self.assertIsInstance(fs, filestore.FileStore) |
1585 | - self.assertTrue(fs.parentdir.startswith('/tmp/TempFileStore.')) |
1586 | - self.assertEqual(fs.id, 'hooray') |
1587 | - self.assertEqual(fs.copies, 3) |
1588 | - |
1589 | - def test_del(self): |
1590 | - fs = filestore.TempFileStore() |
1591 | - parentdir = fs.parentdir |
1592 | - self.assertTrue(path.isdir(parentdir)) |
1593 | - fs = None |
1594 | - self.assertFalse(path.exists(parentdir)) |
1595 | - |
1596 | |
1597 | === added file 'filestore/tests/test_misc.py' |
1598 | --- filestore/tests/test_misc.py 1970-01-01 00:00:00 +0000 |
1599 | +++ filestore/tests/test_misc.py 2012-12-31 04:35:24 +0000 |
1600 | @@ -0,0 +1,150 @@ |
1601 | +# filestore: dmedia hashing protocol and file layout |
1602 | +# Copyright (C) 2012 Novacut Inc |
1603 | +# |
1604 | +# This file is part of `filestore`. |
1605 | +# |
1606 | +# `filestore` is free software: you can redistribute it and/or modify it under |
1607 | +# the terms of the GNU Affero General Public License as published by the Free |
1608 | +# Software Foundation, either version 3 of the License, or (at your option) any |
1609 | +# later version. |
1610 | +# |
1611 | +# `filestore` is distributed in the hope that it will be useful, but WITHOUT ANY |
1612 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
1613 | +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
1614 | +# details. |
1615 | +# |
1616 | +# You should have received a copy of the GNU Affero General Public License along |
1617 | +# with `filestore`. If not, see <http://www.gnu.org/licenses/>. |
1618 | +# |
1619 | +# Authors: |
1620 | +# Jason Gerard DeRose <jderose@novacut.com> |
1621 | + |
1622 | +""" |
1623 | +Unit tests for `filestore.misc` module. |
1624 | +""" |
1625 | + |
1626 | +from unittest import TestCase |
1627 | +import os |
1628 | +from os import path |
1629 | +import json |
1630 | +from base64 import b64encode |
1631 | + |
1632 | +import filestore |
1633 | +from filestore.protocols import MiB, Protocol, VERSION0, VERSION1 |
1634 | +from filestore import misc |
1635 | + |
1636 | + |
1637 | +class TestFunctions(TestCase): |
1638 | + def test_encode(self): |
1639 | + data = os.urandom(30) |
1640 | + self.assertEqual( |
1641 | + misc.encode(data), |
1642 | + b64encode(data).decode('utf-8') |
1643 | + ) |
1644 | + data = os.urandom(35) |
1645 | + self.assertEqual( |
1646 | + misc.encode(data), |
1647 | + b64encode(data).decode('utf-8') |
1648 | + ) |
1649 | + |
1650 | + def test_load_test_vectors(self): |
1651 | + pkg = path.dirname(path.abspath(filestore.__file__)) |
1652 | + self.assertTrue(path.isdir(pkg)) |
1653 | + self.assertEqual( |
1654 | + misc.TEST_VECTORS, |
1655 | + path.join(pkg, 'data', 'test-vectors.json') |
1656 | + ) |
1657 | + self.assertTrue(path.isfile(misc.TEST_VECTORS)) |
1658 | + self.assertEqual( |
1659 | + misc.load_test_vectors(), |
1660 | + json.loads(open(misc.TEST_VECTORS, 'r').read()) |
1661 | + ) |
1662 | + |
1663 | + def test_build_test_leaves(self): |
1664 | + obj = misc.build_test_leaves(MiB) |
1665 | + self.assertEqual(obj, |
1666 | + { |
1667 | + 'A': b'A', |
1668 | + 'B': b'B' * (MiB - 1), |
1669 | + 'C': b'C' * MiB, |
1670 | + } |
1671 | + ) |
1672 | + self.assertEqual(len(obj['A']), 1) |
1673 | + self.assertEqual(len(obj['B']), MiB - 1) |
1674 | + self.assertEqual(len(obj['C']), MiB) |
1675 | + |
1676 | + obj = misc.build_test_leaves(2 * MiB) |
1677 | + self.assertEqual(obj, |
1678 | + { |
1679 | + 'A': b'A', |
1680 | + 'B': b'B' * (2 * MiB - 1), |
1681 | + 'C': b'C' * (2 * MiB), |
1682 | + } |
1683 | + ) |
1684 | + self.assertEqual(len(obj['A']), 1) |
1685 | + self.assertEqual(len(obj['B']), 2 * MiB - 1) |
1686 | + self.assertEqual(len(obj['C']), 2 * MiB) |
1687 | + |
1688 | + def test_build_test_vectors(self): |
1689 | + proto = Protocol(MiB, 200) |
1690 | + leaves = misc.build_test_leaves(proto.leaf_size) |
1691 | + A0 = proto.hash_leaf(0, leaves['A']) |
1692 | + A1 = proto.hash_leaf(1, leaves['A']) |
1693 | + B0 = proto.hash_leaf(0, leaves['B']) |
1694 | + B1 = proto.hash_leaf(1, leaves['B']) |
1695 | + C0 = proto.hash_leaf(0, leaves['C']) |
1696 | + C1 = proto.hash_leaf(1, leaves['C']) |
1697 | + self.assertEqual( |
1698 | + misc.build_test_vectors(proto), |
1699 | + { |
1700 | + 'leaf_hashes': { |
1701 | + 'A': [misc.encode(A0), misc.encode(A1)], |
1702 | + 'B': [misc.encode(B0), misc.encode(B1)], |
1703 | + 'C': [misc.encode(C0), misc.encode(C1)], |
1704 | + }, |
1705 | + 'root_hashes': { |
1706 | + 'A': proto.hash_root(1, A0), |
1707 | + 'B': proto.hash_root(MiB - 1, B0), |
1708 | + 'C': proto.hash_root(MiB, C0), |
1709 | + 'CA': proto.hash_root(MiB + 1, C0 + A1), |
1710 | + 'CB': proto.hash_root(2 * MiB - 1, C0 + B1), |
1711 | + 'CC': proto.hash_root(2 * MiB, C0 + C1), |
1712 | + } |
1713 | + } |
1714 | + ) |
1715 | + |
1716 | + self.assertEqual( |
1717 | + misc.load_test_vectors(), |
1718 | + { |
1719 | + '48': misc.build_test_vectors(VERSION0), |
1720 | + '56': misc.build_test_vectors(VERSION1), |
1721 | + } |
1722 | + ) |
1723 | + |
1724 | + |
1725 | +class TestTempFileStore(TestCase): |
1726 | + def test_init(self): |
1727 | + fs = misc.TempFileStore() |
1728 | + self.assertIsInstance(fs, filestore.FileStore) |
1729 | + self.assertTrue(fs.parentdir.startswith('/tmp/TempFileStore.')) |
1730 | + self.assertIsNone(fs.id) |
1731 | + self.assertEqual(fs.copies, 0) |
1732 | + |
1733 | + fs = misc.TempFileStore('foobar', 2) |
1734 | + self.assertIsInstance(fs, filestore.FileStore) |
1735 | + self.assertTrue(fs.parentdir.startswith('/tmp/TempFileStore.')) |
1736 | + self.assertEqual(fs.id, 'foobar') |
1737 | + self.assertEqual(fs.copies, 2) |
1738 | + |
1739 | + fs = misc.TempFileStore(_id='hooray', copies=3) |
1740 | + self.assertIsInstance(fs, filestore.FileStore) |
1741 | + self.assertTrue(fs.parentdir.startswith('/tmp/TempFileStore.')) |
1742 | + self.assertEqual(fs.id, 'hooray') |
1743 | + self.assertEqual(fs.copies, 3) |
1744 | + |
1745 | + def test_del(self): |
1746 | + fs = misc.TempFileStore() |
1747 | + parentdir = fs.parentdir |
1748 | + self.assertTrue(path.isdir(parentdir)) |
1749 | + fs = None |
1750 | + self.assertFalse(path.exists(parentdir)) |
1751 | |
1752 | === added file 'filestore/tests/test_protocols.py' |
1753 | --- filestore/tests/test_protocols.py 1970-01-01 00:00:00 +0000 |
1754 | +++ filestore/tests/test_protocols.py 2012-12-31 04:35:24 +0000 |
1755 | @@ -0,0 +1,1223 @@ |
1756 | +# filestore: dmedia hashing protocol and file layout |
1757 | +# Copyright (C) 2012 Novacut Inc |
1758 | +# |
1759 | +# This file is part of `filestore`. |
1760 | +# |
1761 | +# `filestore` is free software: you can redistribute it and/or modify it under |
1762 | +# the terms of the GNU Affero General Public License as published by the Free |
1763 | +# Software Foundation, either version 3 of the License, or (at your option) any |
1764 | +# later version. |
1765 | +# |
1766 | +# `filestore` is distributed in the hope that it will be useful, but WITHOUT ANY |
1767 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
1768 | +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
1769 | +# details. |
1770 | +# |
1771 | +# You should have received a copy of the GNU Affero General Public License along |
1772 | +# with `filestore`. If not, see <http://www.gnu.org/licenses/>. |
1773 | +# |
1774 | +# Authors: |
1775 | +# Jason Gerard DeRose <jderose@novacut.com> |
1776 | + |
1777 | +""" |
1778 | +Unit tests for `filestore.protocols` module. |
1779 | +""" |
1780 | + |
1781 | +from unittest import TestCase |
1782 | +import os |
1783 | +from base64 import b32encode, b32decode |
1784 | + |
1785 | +from skein import skein512 |
1786 | + |
1787 | +from filestore.misc import decode |
1788 | +from filestore import protocols |
1789 | + |
1790 | + |
1791 | +MiB = 1024 * 1024 |
1792 | +EightMiB = 8 * MiB |
1793 | +COUNT = 50 * 1000 |
1794 | +TYPE_ERROR = '{}: need a {!r}; got a {!r}: {!r}' |
1795 | + |
1796 | +A = b'A' |
1797 | +B = b'B' * (EightMiB - 1) |
1798 | +C = b'C' * EightMiB |
1799 | + |
1800 | + |
1801 | +class ProtocolTestCase(TestCase): |
1802 | + def check_hash_leaf(self, proto): |
1803 | + |
1804 | + # Test with wrong leaf_index type: |
1805 | + with self.assertRaises(TypeError) as cm: |
1806 | + proto.hash_leaf(0.0, None) |
1807 | + self.assertEqual( |
1808 | + str(cm.exception), |
1809 | + TYPE_ERROR.format('leaf_index', int, float, 0.0) |
1810 | + ) |
1811 | + with self.assertRaises(TypeError) as cm: |
1812 | + proto.hash_leaf(1.5, None) |
1813 | + self.assertEqual( |
1814 | + str(cm.exception), |
1815 | + TYPE_ERROR.format('leaf_index', int, float, 1.5) |
1816 | + ) |
1817 | + |
1818 | + # Test with wrong leaf_index value |
1819 | + with self.assertRaises(ValueError) as cm: |
1820 | + proto.hash_leaf(-1, None) |
1821 | + self.assertEqual( |
1822 | + str(cm.exception), |
1823 | + 'leaf_index: must be >= 0; got -1' |
1824 | + ) |
1825 | + with self.assertRaises(ValueError) as cm: |
1826 | + proto.hash_leaf(-17, None) |
1827 | + self.assertEqual( |
1828 | + str(cm.exception), |
1829 | + 'leaf_index: must be >= 0; got -17' |
1830 | + ) |
1831 | + |
1832 | + # Test with wrong leaf_data type |
1833 | + leaf_data = 'a' * 31415 |
1834 | + with self.assertRaises(TypeError) as cm: |
1835 | + proto.hash_leaf(2, leaf_data) |
1836 | + self.assertEqual( |
1837 | + str(cm.exception), |
1838 | + TYPE_ERROR.format('leaf_data', bytes, str, leaf_data) |
1839 | + ) |
1840 | + |
1841 | + # Test with wrong leaf_data length |
1842 | + toosmall = b'' |
1843 | + with self.assertRaises(ValueError) as cm: |
1844 | + proto.hash_leaf(2, toosmall) |
1845 | + self.assertEqual( |
1846 | + str(cm.exception), |
1847 | + 'Need 1 <= len(leaf_data) <= {}; got 0'.format(proto.leaf_size) |
1848 | + ) |
1849 | + toobig = b'a' * (proto.leaf_size + 1) |
1850 | + with self.assertRaises(ValueError) as cm: |
1851 | + proto.hash_leaf(2, toobig) |
1852 | + self.assertEqual( |
1853 | + str(cm.exception), |
1854 | + 'Need 1 <= len(leaf_data) <= {}; got {}'.format( |
1855 | + proto.leaf_size, proto.leaf_size + 1 |
1856 | + ) |
1857 | + ) |
1858 | + |
1859 | + # Smallest possible leaf, index=0 |
1860 | + accum = set() |
1861 | + leaf_data = b'D' |
1862 | + digest = proto.hash_leaf(0, leaf_data) |
1863 | + accum.add(digest) |
1864 | + self.assertEqual(proto._hash_leaf(0, leaf_data, b''), digest) |
1865 | + |
1866 | + # Smallest possible leaf, index=1 |
1867 | + digest = proto.hash_leaf(1, leaf_data) |
1868 | + self.assertNotIn(digest, accum) |
1869 | + accum.add(digest) |
1870 | + self.assertEqual(proto._hash_leaf(1, leaf_data, b''), digest) |
1871 | + |
1872 | + # Largest possible leaf, index=0 |
1873 | + leaf_data = b'D' * proto.leaf_size |
1874 | + digest = proto.hash_leaf(0, leaf_data) |
1875 | + self.assertNotIn(digest, accum) |
1876 | + accum.add(digest) |
1877 | + self.assertEqual(proto._hash_leaf(0, leaf_data, b''), digest) |
1878 | + |
1879 | + # Largest possible leaf, index=1 |
1880 | + digest = proto.hash_leaf(1, leaf_data) |
1881 | + self.assertNotIn(digest, accum) |
1882 | + accum.add(digest) |
1883 | + self.assertEqual(proto._hash_leaf(1, leaf_data, b''), digest) |
1884 | + |
1885 | + # Test with challenge, smallest leaf, index=0: |
1886 | + challenge = os.urandom(16) |
1887 | + leaf_data = b'D' |
1888 | + digest = proto.hash_leaf(0, leaf_data, challenge) |
1889 | + self.assertNotIn(digest, accum) |
1890 | + accum.add(digest) |
1891 | + self.assertEqual(proto._hash_leaf(0, leaf_data, challenge), digest) |
1892 | + |
1893 | + # Test with challenge, smallest leaf, index=1: |
1894 | + digest = proto.hash_leaf(1, leaf_data, challenge) |
1895 | + self.assertNotIn(digest, accum) |
1896 | + accum.add(digest) |
1897 | + self.assertEqual(proto._hash_leaf(1, leaf_data, challenge), digest) |
1898 | + |
1899 | + # Test with challenge, largest leaf, index=0: |
1900 | + leaf_data = b'D' * proto.leaf_size |
1901 | + digest = proto.hash_leaf(0, leaf_data, challenge) |
1902 | + self.assertNotIn(digest, accum) |
1903 | + accum.add(digest) |
1904 | + self.assertEqual(proto._hash_leaf(0, leaf_data, challenge), digest) |
1905 | + |
1906 | + # Test with challenge, largest leaf, index=1: |
1907 | + digest = proto.hash_leaf(1, leaf_data, challenge) |
1908 | + self.assertNotIn(digest, accum) |
1909 | + accum.add(digest) |
1910 | + self.assertEqual(proto._hash_leaf(1, leaf_data, challenge), digest) |
1911 | + |
1912 | + # Make sure we didn't goof |
1913 | + self.assertEqual(len(accum), 8) |
1914 | + |
1915 | + def check_hash_root(self, proto): |
1916 | + # Test with wrong file_size type: |
1917 | + with self.assertRaises(TypeError) as cm: |
1918 | + proto.hash_root(1.0, None) |
1919 | + self.assertEqual( |
1920 | + str(cm.exception), |
1921 | + TYPE_ERROR.format('file_size', int, float, 1.0) |
1922 | + ) |
1923 | + with self.assertRaises(TypeError) as cm: |
1924 | + proto.hash_root(1.5, None) |
1925 | + self.assertEqual( |
1926 | + str(cm.exception), |
1927 | + TYPE_ERROR.format('file_size', int, float, 1.5) |
1928 | + ) |
1929 | + |
1930 | + # Test with bad file_size value |
1931 | + with self.assertRaises(ValueError) as cm: |
1932 | + proto.hash_root(0, None) |
1933 | + self.assertEqual( |
1934 | + str(cm.exception), |
1935 | + 'file_size: must be >= 1; got 0' |
1936 | + ) |
1937 | + with self.assertRaises(ValueError) as cm: |
1938 | + proto.hash_root(-1, None) |
1939 | + self.assertEqual( |
1940 | + str(cm.exception), |
1941 | + 'file_size: must be >= 1; got -1' |
1942 | + ) |
1943 | + |
1944 | + # Test with wrong leaf_hashes type: |
1945 | + leaf_hashes = 'A' * proto.digest_bytes |
1946 | + with self.assertRaises(TypeError) as cm: |
1947 | + proto.hash_root(17, leaf_hashes) |
1948 | + self.assertEqual( |
1949 | + str(cm.exception), |
1950 | + TYPE_ERROR.format('leaf_hashes', bytes, str, leaf_hashes) |
1951 | + ) |
1952 | + |
1953 | + # Test with empty leaf_hashes |
1954 | + with self.assertRaises(ValueError) as cm: |
1955 | + proto.hash_root(17, b'') |
1956 | + self.assertEqual(str(cm.exception), 'leaf_hashes cannot be empty') |
1957 | + |
1958 | + # Test when len(leaf_hashes) is not a multiple of Protocol.digest_bytes |
1959 | + with self.assertRaises(ValueError) as cm: |
1960 | + proto.hash_root(17, b'A' * (proto.digest_bytes + 1)) |
1961 | + self.assertEqual( |
1962 | + str(cm.exception), |
1963 | + 'len(leaf_hashes) is {}, not multiple of {}'.format( |
1964 | + proto.digest_bytes + 1, proto.digest_bytes |
1965 | + ) |
1966 | + ) |
1967 | + |
1968 | + # Test low and high bounds |
1969 | + with self.assertRaises(ValueError) as cm: |
1970 | + proto.hash_root(proto.leaf_size + 1, b'a' * proto.digest_bytes) |
1971 | + self.assertEqual( |
1972 | + str(cm.exception), |
1973 | + 'Need 1 <= file_size <= {}; got {}'.format( |
1974 | + proto.leaf_size, proto.leaf_size + 1 |
1975 | + ) |
1976 | + ) |
1977 | + with self.assertRaises(ValueError) as cm: |
1978 | + proto.hash_root(proto.leaf_size, b'ab' * proto.digest_bytes) |
1979 | + self.assertEqual( |
1980 | + str(cm.exception), |
1981 | + 'Need {} <= file_size <= {}; got {}'.format( |
1982 | + proto.leaf_size + 1, |
1983 | + 2 * proto.leaf_size, |
1984 | + proto.leaf_size |
1985 | + ) |
1986 | + ) |
1987 | + with self.assertRaises(ValueError) as cm: |
1988 | + proto.hash_root( |
1989 | + 2 * proto.leaf_size + 1, |
1990 | + b'ab' * proto.digest_bytes |
1991 | + ) |
1992 | + self.assertEqual( |
1993 | + str(cm.exception), |
1994 | + 'Need {} <= file_size <= {}; got {}'.format( |
1995 | + proto.leaf_size + 1, |
1996 | + 2 * proto.leaf_size, |
1997 | + 2 * proto.leaf_size + 1 |
1998 | + ) |
1999 | + ) |
2000 | + |
2001 | + # Test with good file_size and leaf_hashes |
2002 | + accum = set() |
2003 | + leaf_hashes = b'D' * proto.digest_bytes |
2004 | + _id = proto.hash_root(1, leaf_hashes) |
2005 | + self.assertNotIn(_id, accum) |
2006 | + accum.add(_id) |
2007 | + self.assertEqual( |
2008 | + proto._hash_root(1, leaf_hashes), |
2009 | + b32decode(_id.encode('utf-8')) |
2010 | + ) |
2011 | + |
2012 | + _id = proto.hash_root(2, leaf_hashes) |
2013 | + self.assertNotIn(_id, accum) |
2014 | + accum.add(_id) |
2015 | + self.assertEqual( |
2016 | + proto._hash_root(2, leaf_hashes), |
2017 | + b32decode(_id.encode('utf-8')) |
2018 | + ) |
2019 | + |
2020 | + leaf_hashes = b'AB' * proto.digest_bytes |
2021 | + _id = proto.hash_root(proto.leaf_size + 1, leaf_hashes) |
2022 | + self.assertNotIn(_id, accum) |
2023 | + accum.add(_id) |
2024 | + self.assertEqual( |
2025 | + proto._hash_root(proto.leaf_size + 1, leaf_hashes), |
2026 | + b32decode(_id.encode('utf-8')) |
2027 | + ) |
2028 | + |
2029 | + _id = proto.hash_root(proto.leaf_size + 2, leaf_hashes) |
2030 | + self.assertNotIn(_id, accum) |
2031 | + accum.add(_id) |
2032 | + self.assertEqual( |
2033 | + proto._hash_root(proto.leaf_size + 2, leaf_hashes), |
2034 | + b32decode(_id.encode('utf-8')) |
2035 | + ) |
2036 | + |
2037 | + # Make sure we didn't goof |
2038 | + self.assertEqual(len(accum), 4) |
2039 | + |
2040 | + def sanity_check_hash_leaf(self, proto): |
2041 | + """ |
2042 | + Random value sanity check on Protocol._hash_leaf(). |
2043 | + """ |
2044 | + |
2045 | + # Sanity check on our crytographic claim that the |
2046 | + # leaf-hash is tied to the leaf-index: |
2047 | + leaf_data = os.urandom(16) |
2048 | + accum = set( |
2049 | + proto._hash_leaf(i, leaf_data, b'') |
2050 | + for i in range(COUNT) |
2051 | + ) |
2052 | + self.assertEqual(len(accum), COUNT) |
2053 | + |
2054 | + # Sanity check on our crytographic claim that the |
2055 | + # leaf-hash is tied to the leaf-data: |
2056 | + accum = set( |
2057 | + proto._hash_leaf(21, os.urandom(16), b'') |
2058 | + for i in range(COUNT) |
2059 | + ) |
2060 | + self.assertEqual(len(accum), COUNT) |
2061 | + |
2062 | + # Sanity check on our crytographic claim that the |
2063 | + # leaf-hash is tied to the challenge: |
2064 | + accum = set( |
2065 | + proto._hash_leaf(21, leaf_data, os.urandom(16)) |
2066 | + for i in range(COUNT) |
2067 | + ) |
2068 | + self.assertEqual(len(accum), COUNT) |
2069 | + |
2070 | + def sanity_check_hash_root(self, proto): |
2071 | + """ |
2072 | + Random value sanity check on Protocol._hash_root(). |
2073 | + """ |
2074 | + |
2075 | + # Sanity check on our crytographic claim that the |
2076 | + # root-hash is tied to the file-size: |
2077 | + leaf_hashes = os.urandom(proto.digest_bytes) |
2078 | + accum = set( |
2079 | + proto._hash_root(size, leaf_hashes) |
2080 | + for size in range(1, COUNT + 1) |
2081 | + ) |
2082 | + self.assertEqual(len(accum), COUNT) |
2083 | + |
2084 | + # Sanity check on our crytographic claim that the |
2085 | + # root-hash is tied to the leaf-hashes: |
2086 | + accum = set( |
2087 | + proto._hash_root(314159, os.urandom(proto.digest_bytes)) |
2088 | + for i in range(COUNT) |
2089 | + ) |
2090 | + self.assertEqual(len(accum), COUNT) |
2091 | + |
2092 | + |
2093 | +class TestProtocol(ProtocolTestCase): |
2094 | + def test_init(self): |
2095 | + # Wrong `leaf_size` type: |
2096 | + with self.assertRaises(TypeError) as cm: |
2097 | + protocols.Protocol(1.0 * MiB, 200) |
2098 | + self.assertEqual( |
2099 | + str(cm.exception), |
2100 | + TYPE_ERROR.format('leaf_size', int, float, 1.0 * MiB) |
2101 | + ) |
2102 | + |
2103 | + # leaf_size < MiB: |
2104 | + with self.assertRaises(ValueError) as cm: |
2105 | + protocols.Protocol(MiB - 1, 200) |
2106 | + self.assertEqual( |
2107 | + str(cm.exception), |
2108 | + 'leaf_size: must be >= MiB; got {!r}'.format(MiB - 1) |
2109 | + ) |
2110 | + |
2111 | + # leaf_size % MiB != 0: |
2112 | + with self.assertRaises(ValueError) as cm: |
2113 | + protocols.Protocol(MiB * 3 // 2, 200) |
2114 | + self.assertEqual( |
2115 | + str(cm.exception), |
2116 | + 'leaf_size: must be multiple of MiB; got {!r}'.format(MiB * 3 // 2) |
2117 | + ) |
2118 | + |
2119 | + # Wrong `digest_bits` type: |
2120 | + with self.assertRaises(TypeError) as cm: |
2121 | + protocols.Protocol(MiB, 200.0) |
2122 | + self.assertEqual( |
2123 | + str(cm.exception), |
2124 | + TYPE_ERROR.format('digest_bits', int, float, 200.0) |
2125 | + ) |
2126 | + |
2127 | + # digest_bits < 200: |
2128 | + with self.assertRaises(ValueError) as cm: |
2129 | + protocols.Protocol(MiB, 159) |
2130 | + self.assertEqual( |
2131 | + str(cm.exception), |
2132 | + 'digest_bits: must be >= 200; got 159' |
2133 | + ) |
2134 | + |
2135 | + # digest_bits % 40 != 0: |
2136 | + with self.assertRaises(ValueError) as cm: |
2137 | + protocols.Protocol(MiB, 220) |
2138 | + self.assertEqual( |
2139 | + str(cm.exception), |
2140 | + 'digest_bits: must be multiple of 40; got 220' |
2141 | + ) |
2142 | + |
2143 | + # Test with the good minimum values: |
2144 | + proto = protocols.Protocol(MiB, 200) |
2145 | + self.assertEqual(proto.leaf_size, MiB) |
2146 | + self.assertEqual(proto.digest_bits, 200) |
2147 | + self.assertEqual(proto.digest_bytes, 25) |
2148 | + self.assertEqual(proto.digest_b32len, 40) |
2149 | + |
2150 | + # Test with the actual v0 protocol values: |
2151 | + proto = protocols.Protocol(8 * MiB, 240) |
2152 | + self.assertEqual(proto.leaf_size, 8 * MiB) |
2153 | + self.assertEqual(proto.digest_bits, 240) |
2154 | + self.assertEqual(proto.digest_bytes, 30) |
2155 | + self.assertEqual(proto.digest_b32len, 48) |
2156 | + |
2157 | + # Test with the actual v1 protocol values: |
2158 | + proto = protocols.Protocol(8 * MiB, 280) |
2159 | + self.assertEqual(proto.leaf_size, 8 * MiB) |
2160 | + self.assertEqual(proto.digest_bits, 280) |
2161 | + self.assertEqual(proto.digest_bytes, 35) |
2162 | + self.assertEqual(proto.digest_b32len, 56) |
2163 | + |
2164 | + def test_hash_leaf(self): |
2165 | + proto = protocols.Protocol(MiB, 200) |
2166 | + self.check_hash_leaf(proto) |
2167 | + self.sanity_check_hash_leaf(proto) |
2168 | + |
2169 | + # Smallest possible leaf, index=0 |
2170 | + accum = set() |
2171 | + leaf_data = b'D' |
2172 | + digest = proto.hash_leaf(0, leaf_data) |
2173 | + accum.add(digest) |
2174 | + self.assertEqual( |
2175 | + digest, |
2176 | + skein512(leaf_data, |
2177 | + digest_bits=200, |
2178 | + pers=protocols.PERS_LEAF, |
2179 | + key=b'0', |
2180 | + ).digest() |
2181 | + ) |
2182 | + self.assertEqual(proto._hash_leaf(0, leaf_data, b''), digest) |
2183 | + |
2184 | + # Smallest possible leaf, index=1 |
2185 | + digest = proto.hash_leaf(1, leaf_data) |
2186 | + self.assertNotIn(digest, accum) |
2187 | + accum.add(digest) |
2188 | + self.assertEqual( |
2189 | + digest, |
2190 | + skein512(leaf_data, |
2191 | + digest_bits=200, |
2192 | + pers=protocols.PERS_LEAF, |
2193 | + key=b'1', |
2194 | + ).digest() |
2195 | + ) |
2196 | + self.assertEqual(proto._hash_leaf(1, leaf_data, b''), digest) |
2197 | + |
2198 | + # Largest possible leaf, index=0 |
2199 | + leaf_data = b'D' * MiB |
2200 | + digest = proto.hash_leaf(0, leaf_data) |
2201 | + self.assertNotIn(digest, accum) |
2202 | + accum.add(digest) |
2203 | + self.assertEqual( |
2204 | + digest, |
2205 | + skein512(leaf_data, |
2206 | + digest_bits=200, |
2207 | + pers=protocols.PERS_LEAF, |
2208 | + key=b'0', |
2209 | + ).digest() |
2210 | + ) |
2211 | + self.assertEqual(proto._hash_leaf(0, leaf_data, b''), digest) |
2212 | + |
2213 | + # Largest possible leaf, index=1 |
2214 | + digest = proto.hash_leaf(1, leaf_data) |
2215 | + self.assertNotIn(digest, accum) |
2216 | + accum.add(digest) |
2217 | + self.assertEqual( |
2218 | + digest, |
2219 | + skein512(leaf_data, |
2220 | + digest_bits=200, |
2221 | + pers=protocols.PERS_LEAF, |
2222 | + key=b'1', |
2223 | + ).digest() |
2224 | + ) |
2225 | + self.assertEqual(proto._hash_leaf(1, leaf_data, b''), digest) |
2226 | + |
2227 | + # Test with challenge, smallest leaf, index=0: |
2228 | + challenge = os.urandom(16) |
2229 | + leaf_data = b'D' |
2230 | + digest = proto.hash_leaf(0, leaf_data, challenge) |
2231 | + self.assertNotIn(digest, accum) |
2232 | + accum.add(digest) |
2233 | + self.assertEqual( |
2234 | + digest, |
2235 | + skein512(leaf_data, |
2236 | + digest_bits=200, |
2237 | + pers=protocols.PERS_LEAF, |
2238 | + key=b'0', |
2239 | + nonce=challenge, |
2240 | + ).digest() |
2241 | + ) |
2242 | + self.assertEqual(proto._hash_leaf(0, leaf_data, challenge), digest) |
2243 | + |
2244 | + # Test with challenge, smallest leaf, index=1: |
2245 | + digest = proto.hash_leaf(1, leaf_data, challenge) |
2246 | + self.assertNotIn(digest, accum) |
2247 | + accum.add(digest) |
2248 | + self.assertEqual( |
2249 | + digest, |
2250 | + skein512(leaf_data, |
2251 | + digest_bits=200, |
2252 | + pers=protocols.PERS_LEAF, |
2253 | + key=b'1', |
2254 | + nonce=challenge, |
2255 | + ).digest() |
2256 | + ) |
2257 | + self.assertEqual(proto._hash_leaf(1, leaf_data, challenge), digest) |
2258 | + |
2259 | + # Test with challenge, largest leaf, index=0: |
2260 | + leaf_data = b'D' * MiB |
2261 | + digest = proto.hash_leaf(0, leaf_data, challenge) |
2262 | + self.assertNotIn(digest, accum) |
2263 | + accum.add(digest) |
2264 | + self.assertEqual( |
2265 | + digest, |
2266 | + skein512(leaf_data, |
2267 | + digest_bits=200, |
2268 | + pers=protocols.PERS_LEAF, |
2269 | + key=b'0', |
2270 | + nonce=challenge, |
2271 | + ).digest() |
2272 | + ) |
2273 | + self.assertEqual(proto._hash_leaf(0, leaf_data, challenge), digest) |
2274 | + |
2275 | + # Test with challenge, largest leaf, index=1: |
2276 | + digest = proto.hash_leaf(1, leaf_data, challenge) |
2277 | + self.assertNotIn(digest, accum) |
2278 | + accum.add(digest) |
2279 | + self.assertEqual( |
2280 | + digest, |
2281 | + skein512(leaf_data, |
2282 | + digest_bits=200, |
2283 | + pers=protocols.PERS_LEAF, |
2284 | + key=b'1', |
2285 | + nonce=challenge, |
2286 | + ).digest() |
2287 | + ) |
2288 | + self.assertEqual(proto._hash_leaf(1, leaf_data, challenge), digest) |
2289 | + |
2290 | + # Make sure we didn't goof |
2291 | + self.assertEqual(len(accum), 8) |
2292 | + |
2293 | + def test_hash_root(self): |
2294 | + proto = protocols.Protocol(MiB, 200) |
2295 | + self.check_hash_root(proto) |
2296 | + self.sanity_check_hash_root(proto) |
2297 | + |
2298 | + accum = set() |
2299 | + |
2300 | + # Smallest file-size possible for one leaf: |
2301 | + leaf_hashes = b'D' * 25 |
2302 | + digest = proto._hash_root(1, leaf_hashes) |
2303 | + self.assertNotIn(digest, accum) |
2304 | + accum.add(digest) |
2305 | + self.assertEqual( |
2306 | + digest, |
2307 | + skein512(leaf_hashes, |
2308 | + digest_bits=200, |
2309 | + pers=protocols.PERS_ROOT, |
2310 | + key=b'1', |
2311 | + ).digest() |
2312 | + ) |
2313 | + self.assertEqual( |
2314 | + proto.hash_root(1, leaf_hashes), |
2315 | + b32encode(digest).decode('utf-8') |
2316 | + ) |
2317 | + |
2318 | + # Largest file-size possible for one leaf: |
2319 | + digest = proto._hash_root(MiB, leaf_hashes) |
2320 | + self.assertNotIn(digest, accum) |
2321 | + accum.add(digest) |
2322 | + self.assertEqual( |
2323 | + digest, |
2324 | + skein512(leaf_hashes, |
2325 | + digest_bits=200, |
2326 | + pers=protocols.PERS_ROOT, |
2327 | + key=b'1048576', |
2328 | + ).digest() |
2329 | + ) |
2330 | + self.assertEqual( |
2331 | + proto.hash_root(MiB, leaf_hashes), |
2332 | + b32encode(digest).decode('utf-8') |
2333 | + ) |
2334 | + |
2335 | + # Smallest file-size possible for two leaves: |
2336 | + leaf_hashes = b'D' * 50 |
2337 | + digest = proto._hash_root(MiB + 1, leaf_hashes) |
2338 | + self.assertNotIn(digest, accum) |
2339 | + accum.add(digest) |
2340 | + self.assertEqual( |
2341 | + digest, |
2342 | + skein512(leaf_hashes, |
2343 | + digest_bits=200, |
2344 | + pers=protocols.PERS_ROOT, |
2345 | + key=b'1048577', |
2346 | + ).digest() |
2347 | + ) |
2348 | + self.assertEqual( |
2349 | + proto.hash_root(MiB + 1, leaf_hashes), |
2350 | + b32encode(digest).decode('utf-8') |
2351 | + ) |
2352 | + |
2353 | + # Largest file-size possible for two leaves: |
2354 | + digest = proto._hash_root(2 * MiB, leaf_hashes) |
2355 | + self.assertNotIn(digest, accum) |
2356 | + accum.add(digest) |
2357 | + self.assertEqual( |
2358 | + digest, |
2359 | + skein512(leaf_hashes, |
2360 | + digest_bits=200, |
2361 | + pers=protocols.PERS_ROOT, |
2362 | + key=b'2097152', |
2363 | + ).digest() |
2364 | + ) |
2365 | + self.assertEqual( |
2366 | + proto.hash_root(2 * MiB, leaf_hashes), |
2367 | + b32encode(digest).decode('utf-8') |
2368 | + ) |
2369 | + |
2370 | + # Make sure we didn't goof |
2371 | + self.assertEqual(len(accum), 4) |
2372 | + |
2373 | + |
2374 | +class TestVersionOne(ProtocolTestCase): |
2375 | + |
2376 | + def test_hash_leaf(self): |
2377 | + proto = protocols.VERSION1 |
2378 | + |
2379 | + self.check_hash_leaf(proto) |
2380 | + self.sanity_check_hash_leaf(proto) |
2381 | + |
2382 | + # A0 |
2383 | + digest = proto._hash_leaf(0, A, b'') |
2384 | + self.assertEqual(digest, |
2385 | + decode('vnqPKTOknItURAuVQi5gHml2ncDDv/apR7CRbZ8Gw57L9S0=') |
2386 | + ) |
2387 | + self.assertEqual( |
2388 | + digest, |
2389 | + skein512(A, |
2390 | + digest_bits=280, |
2391 | + pers=protocols.PERS_LEAF, |
2392 | + key=b'0', |
2393 | + ).digest() |
2394 | + ) |
2395 | + self.assertEqual(proto.hash_leaf(0, A), digest) |
2396 | + |
2397 | + # A1 |
2398 | + digest = proto._hash_leaf(1, A, b'') |
2399 | + self.assertEqual(digest, |
2400 | + decode('mQX/95lrNeZNnsQKj02VnNXIqgRjwGnqLoZoF9EHkmmhjjs=') |
2401 | + ) |
2402 | + self.assertEqual( |
2403 | + digest, |
2404 | + skein512(A, |
2405 | + digest_bits=280, |
2406 | + pers=protocols.PERS_LEAF, |
2407 | + key=b'1', |
2408 | + ).digest() |
2409 | + ) |
2410 | + self.assertEqual(proto.hash_leaf(1, A), digest) |
2411 | + |
2412 | + # B0 |
2413 | + digest = proto._hash_leaf(0, B, b'') |
2414 | + self.assertEqual(digest, |
2415 | + decode('f776qpuQhQO1EWR1nrUCIxD9+sIqTcBb2AgJ6iOK8Gq8bIg=') |
2416 | + ) |
2417 | + self.assertEqual( |
2418 | + digest, |
2419 | + skein512(B, |
2420 | + digest_bits=280, |
2421 | + pers=protocols.PERS_LEAF, |
2422 | + key=b'0', |
2423 | + ).digest() |
2424 | + ) |
2425 | + self.assertEqual(proto.hash_leaf(0, B), digest) |
2426 | + |
2427 | + # B1 |
2428 | + digest = proto._hash_leaf(1, B, b'') |
2429 | + self.assertEqual(digest, |
2430 | + decode('ygruy07GH5BRvrwtO2TCGCdxGjpOG/ogF+8sytiTYihTfsw=') |
2431 | + ) |
2432 | + self.assertEqual( |
2433 | + digest, |
2434 | + skein512(B, |
2435 | + digest_bits=280, |
2436 | + pers=protocols.PERS_LEAF, |
2437 | + key=b'1', |
2438 | + ).digest() |
2439 | + ) |
2440 | + self.assertEqual(proto.hash_leaf(1, B), digest) |
2441 | + |
2442 | + # C0 |
2443 | + digest = proto._hash_leaf(0, C, b'') |
2444 | + self.assertEqual(digest, |
2445 | + decode('jbRklQZ8C9suPd0Ar/5vO2JVMoHEQ62ksuG1EsYtPbzr8FE=') |
2446 | + ) |
2447 | + self.assertEqual( |
2448 | + digest, |
2449 | + skein512(C, |
2450 | + digest_bits=280, |
2451 | + pers=protocols.PERS_LEAF, |
2452 | + key=b'0', |
2453 | + ).digest() |
2454 | + ) |
2455 | + self.assertEqual(proto.hash_leaf(0, C), digest) |
2456 | + |
2457 | + # C1 |
2458 | + digest = proto._hash_leaf(1, C, b'') |
2459 | + self.assertEqual(digest, |
2460 | + decode('uGq34DS/oD0NT0zxWl562rYAUBbqeRZDvNtg0YDujBPzBGQ=') |
2461 | + ) |
2462 | + self.assertEqual( |
2463 | + digest, |
2464 | + skein512(C, |
2465 | + digest_bits=280, |
2466 | + pers=protocols.PERS_LEAF, |
2467 | + key=b'1', |
2468 | + ).digest() |
2469 | + ) |
2470 | + self.assertEqual(proto.hash_leaf(1, C), digest) |
2471 | + |
2472 | + def test_hash_root(self): |
2473 | + proto = protocols.VERSION1 |
2474 | + |
2475 | + self.check_hash_root(proto) |
2476 | + self.sanity_check_hash_root(proto) |
2477 | + |
2478 | + A0 = decode('vnqPKTOknItURAuVQi5gHml2ncDDv/apR7CRbZ8Gw57L9S0=') |
2479 | + A1 = decode('mQX/95lrNeZNnsQKj02VnNXIqgRjwGnqLoZoF9EHkmmhjjs=') |
2480 | + B0 = decode('f776qpuQhQO1EWR1nrUCIxD9+sIqTcBb2AgJ6iOK8Gq8bIg=') |
2481 | + B1 = decode('ygruy07GH5BRvrwtO2TCGCdxGjpOG/ogF+8sytiTYihTfsw=') |
2482 | + C0 = decode('jbRklQZ8C9suPd0Ar/5vO2JVMoHEQ62ksuG1EsYtPbzr8FE=') |
2483 | + C1 = decode('uGq34DS/oD0NT0zxWl562rYAUBbqeRZDvNtg0YDujBPzBGQ=') |
2484 | + |
2485 | + # A |
2486 | + digest = proto._hash_root(1, A0) |
2487 | + self.assertEqual( |
2488 | + b32encode(digest).decode('utf-8'), |
2489 | + 'FWV6OJYI36C5NN5DC4GS2IGWZXFCZCGJGHK35YV62LKAG7D2Z4LO4Z2S' |
2490 | + ) |
2491 | + self.assertEqual( |
2492 | + digest, |
2493 | + skein512(A0, |
2494 | + digest_bits=280, |
2495 | + pers=protocols.PERS_ROOT, |
2496 | + key=b'1', |
2497 | + ).digest() |
2498 | + ) |
2499 | + self.assertEqual( |
2500 | + proto.hash_root(1, A0), |
2501 | + b32encode(digest).decode('utf-8') |
2502 | + ) |
2503 | + |
2504 | + # B |
2505 | + digest = proto._hash_root(8388607, B0) |
2506 | + self.assertEqual( |
2507 | + b32encode(digest).decode('utf-8'), |
2508 | + 'OB756PX5V32JMKJAFKIAJ4AFSFPA2WLNIK32ELNO4FJLJPEEEN6DCAAJ' |
2509 | + ) |
2510 | + self.assertEqual( |
2511 | + digest, |
2512 | + skein512(B0, |
2513 | + digest_bits=280, |
2514 | + pers=protocols.PERS_ROOT, |
2515 | + key=b'8388607', |
2516 | + ).digest() |
2517 | + ) |
2518 | + self.assertEqual( |
2519 | + proto.hash_root(8388607, B0), |
2520 | + b32encode(digest).decode('utf-8') |
2521 | + ) |
2522 | + |
2523 | + # C |
2524 | + digest = proto._hash_root(8388608, C0) |
2525 | + self.assertEqual( |
2526 | + b32encode(digest).decode('utf-8'), |
2527 | + 'QSOHXCDH64IQBOG2NM67XEC6MLZKKPGBTISWWRPMCFCJ2EKMA2SMLY46' |
2528 | + ) |
2529 | + self.assertEqual( |
2530 | + digest, |
2531 | + skein512(C0, |
2532 | + digest_bits=280, |
2533 | + pers=protocols.PERS_ROOT, |
2534 | + key=b'8388608', |
2535 | + ).digest() |
2536 | + ) |
2537 | + self.assertEqual( |
2538 | + proto.hash_root(8388608, C0), |
2539 | + b32encode(digest).decode('utf-8') |
2540 | + ) |
2541 | + |
2542 | + # CA |
2543 | + digest = proto._hash_root(8388609, C0 + A1) |
2544 | + self.assertEqual( |
2545 | + b32encode(digest).decode('utf-8'), |
2546 | + 'BQ5UTB33ML2VDTCTLVXK6N4VSMGGKKKDYKG24B6DOAFJB6NRSGMB5BNO' |
2547 | + ) |
2548 | + self.assertEqual( |
2549 | + digest, |
2550 | + skein512(C0 + A1, |
2551 | + digest_bits=280, |
2552 | + pers=protocols.PERS_ROOT, |
2553 | + key=b'8388609', |
2554 | + ).digest() |
2555 | + ) |
2556 | + self.assertEqual( |
2557 | + proto.hash_root(8388609, C0 + A1), |
2558 | + b32encode(digest).decode('utf-8') |
2559 | + ) |
2560 | + |
2561 | + # CB |
2562 | + digest = proto._hash_root(16777215, C0 + B1) |
2563 | + self.assertEqual( |
2564 | + b32encode(digest).decode('utf-8'), |
2565 | + 'ER3LDDZ2LHMTDLOPE5XA5GEEZ6OE45VFIFLY42GEMV4TSZ2B7GJJXAIX' |
2566 | + ) |
2567 | + self.assertEqual( |
2568 | + digest, |
2569 | + skein512(C0 + B1, |
2570 | + digest_bits=280, |
2571 | + pers=protocols.PERS_ROOT, |
2572 | + key=b'16777215', |
2573 | + ).digest() |
2574 | + ) |
2575 | + self.assertEqual( |
2576 | + proto.hash_root(16777215, C0 + B1), |
2577 | + b32encode(digest).decode('utf-8') |
2578 | + ) |
2579 | + |
2580 | + # CC |
2581 | + digest = proto._hash_root(16777216, C0 + C1) |
2582 | + self.assertEqual( |
2583 | + b32encode(digest).decode('utf-8'), |
2584 | + 'R6RN5KL7UBNJWR5SK5YPUKIGAOWWFMYYOVESU5DPT34X5MEK75PXXYIX' |
2585 | + ) |
2586 | + self.assertEqual( |
2587 | + digest, |
2588 | + skein512(C0 + C1, |
2589 | + digest_bits=280, |
2590 | + pers=protocols.PERS_ROOT, |
2591 | + key=b'16777216', |
2592 | + ).digest() |
2593 | + ) |
2594 | + self.assertEqual( |
2595 | + proto.hash_root(16777216, C0 + C1), |
2596 | + b32encode(digest).decode('utf-8') |
2597 | + ) |
2598 | + |
2599 | + |
2600 | +class TestOldProtocol(TestCase): |
2601 | + def test_hash_leaf_index(self): |
2602 | + proto = protocols.OldProtocol(MiB, 200) |
2603 | + accum = set() |
2604 | + key = proto._hash_leaf_index(0) |
2605 | + accum.add(key) |
2606 | + self.assertEqual(key, |
2607 | + skein512(b'0', |
2608 | + digest_bits=200, |
2609 | + pers=protocols.PERS_LEAF_INDEX, |
2610 | + ).digest() |
2611 | + ) |
2612 | + |
2613 | + key = proto._hash_leaf_index(17) |
2614 | + self.assertNotIn(key, accum) |
2615 | + accum.add(key) |
2616 | + self.assertEqual(key, |
2617 | + skein512(b'17', |
2618 | + digest_bits=200, |
2619 | + pers=protocols.PERS_LEAF_INDEX, |
2620 | + ).digest() |
2621 | + ) |
2622 | + self.assertEqual(len(accum), 2) |
2623 | + |
2624 | + count = 25 * 1000 |
2625 | + accum = set( |
2626 | + proto._hash_leaf_index(i) for i in range(count) |
2627 | + ) |
2628 | + self.assertEqual(len(accum), count) |
2629 | + |
2630 | + ############################################# |
2631 | + # Again, this time with different digest_bits |
2632 | + proto = protocols.OldProtocol(MiB, 240) |
2633 | + accum = set() |
2634 | + key = proto._hash_leaf_index(0) |
2635 | + accum.add(key) |
2636 | + self.assertEqual(key, |
2637 | + skein512(b'0', |
2638 | + digest_bits=240, |
2639 | + pers=protocols.PERS_LEAF_INDEX, |
2640 | + ).digest() |
2641 | + ) |
2642 | + |
2643 | + key = proto._hash_leaf_index(17) |
2644 | + self.assertNotIn(key, accum) |
2645 | + accum.add(key) |
2646 | + self.assertEqual(key, |
2647 | + skein512(b'17', |
2648 | + digest_bits=240, |
2649 | + pers=protocols.PERS_LEAF_INDEX, |
2650 | + ).digest() |
2651 | + ) |
2652 | + self.assertEqual(len(accum), 2) |
2653 | + |
2654 | + count = 25 * 1000 |
2655 | + accum = set( |
2656 | + proto._hash_leaf_index(i) for i in range(count) |
2657 | + ) |
2658 | + self.assertEqual(len(accum), count) |
2659 | + |
2660 | + def test_hash_leaf(self): |
2661 | + challenge = b'secret foo bar' |
2662 | + proto = protocols.OldProtocol(MiB, 200) |
2663 | + key0 = proto._hash_leaf_index(0) |
2664 | + key17 = proto._hash_leaf_index(17) |
2665 | + |
2666 | + accum = set() |
2667 | + |
2668 | + # Min leaf size, index=0 |
2669 | + leaf_data = b'D' |
2670 | + digest = proto._hash_leaf(0, leaf_data, b'') |
2671 | + self.assertNotIn(digest, accum) |
2672 | + accum.add(digest) |
2673 | + self.assertEqual(digest, |
2674 | + skein512(key0 + leaf_data, |
2675 | + digest_bits=200, |
2676 | + pers=protocols.PERS_LEAF, |
2677 | + ).digest() |
2678 | + ) |
2679 | + |
2680 | + # Min leaf size, index=17 |
2681 | + digest = proto._hash_leaf(17, leaf_data, b'') |
2682 | + self.assertNotIn(digest, accum) |
2683 | + accum.add(digest) |
2684 | + self.assertEqual(digest, |
2685 | + skein512(key17 + leaf_data, |
2686 | + digest_bits=200, |
2687 | + pers=protocols.PERS_LEAF, |
2688 | + ).digest() |
2689 | + ) |
2690 | + |
2691 | + # With challenge, min leaf size, index=0 |
2692 | + digest = proto._hash_leaf(0, leaf_data, challenge) |
2693 | + self.assertNotIn(digest, accum) |
2694 | + accum.add(digest) |
2695 | + self.assertEqual(digest, |
2696 | + skein512(key0 + leaf_data, |
2697 | + digest_bits=200, |
2698 | + pers=protocols.PERS_LEAF, |
2699 | + nonce=challenge, |
2700 | + ).digest() |
2701 | + ) |
2702 | + |
2703 | + # With challenge, min leaf size, index=17 |
2704 | + digest = proto._hash_leaf(17, leaf_data, challenge) |
2705 | + self.assertNotIn(digest, accum) |
2706 | + accum.add(digest) |
2707 | + self.assertEqual(digest, |
2708 | + skein512(key17 + leaf_data, |
2709 | + digest_bits=200, |
2710 | + pers=protocols.PERS_LEAF, |
2711 | + nonce=challenge, |
2712 | + ).digest() |
2713 | + ) |
2714 | + |
2715 | + # Max leaf size, index=0 |
2716 | + leaf_data = b'D' * MiB |
2717 | + digest = proto._hash_leaf(0, leaf_data, b'') |
2718 | + self.assertNotIn(digest, accum) |
2719 | + accum.add(digest) |
2720 | + self.assertEqual(digest, |
2721 | + skein512(key0 + leaf_data, |
2722 | + digest_bits=200, |
2723 | + pers=protocols.PERS_LEAF, |
2724 | + ).digest() |
2725 | + ) |
2726 | + |
2727 | + # Max leaf size, index=17 |
2728 | + digest = proto._hash_leaf(17, leaf_data, b'') |
2729 | + self.assertNotIn(digest, accum) |
2730 | + accum.add(digest) |
2731 | + self.assertEqual(digest, |
2732 | + skein512(key17 + leaf_data, |
2733 | + digest_bits=200, |
2734 | + pers=protocols.PERS_LEAF, |
2735 | + ).digest() |
2736 | + ) |
2737 | + |
2738 | + # With challenge, max leaf size, index=0 |
2739 | + digest = proto._hash_leaf(0, leaf_data, challenge) |
2740 | + self.assertNotIn(digest, accum) |
2741 | + accum.add(digest) |
2742 | + self.assertEqual(digest, |
2743 | + skein512(key0 + leaf_data, |
2744 | + digest_bits=200, |
2745 | + pers=protocols.PERS_LEAF, |
2746 | + nonce=challenge, |
2747 | + ).digest() |
2748 | + ) |
2749 | + |
2750 | + # With challenge, max leaf size, index=17 |
2751 | + digest = proto._hash_leaf(17, leaf_data, challenge) |
2752 | + self.assertNotIn(digest, accum) |
2753 | + accum.add(digest) |
2754 | + self.assertEqual(digest, |
2755 | + skein512(key17 + leaf_data, |
2756 | + digest_bits=200, |
2757 | + pers=protocols.PERS_LEAF, |
2758 | + nonce=challenge, |
2759 | + ).digest() |
2760 | + ) |
2761 | + |
2762 | + # Make sure we didn't goof: |
2763 | + self.assertEqual(len(accum), 8) |
2764 | + |
2765 | + # A 25k value sanity check on our crytographic claim that the |
2766 | + # leaf-hash is tied to the leaf-index: |
2767 | + count = 25 * 1000 |
2768 | + leaf_data = os.urandom(32) |
2769 | + accum = set( |
2770 | + proto._hash_leaf(i, leaf_data, b'') |
2771 | + for i in range(count) |
2772 | + ) |
2773 | + self.assertEqual(len(accum), count) |
2774 | + |
2775 | + # A 25k random value sanity check on our crytographic claim that the |
2776 | + # leaf-hash is tied to the leaf-data: |
2777 | + accum = set( |
2778 | + proto._hash_leaf(21, os.urandom(32), b'') |
2779 | + for i in range(count) |
2780 | + ) |
2781 | + self.assertEqual(len(accum), count) |
2782 | + |
2783 | + # A 25k random value sanity check on our crytographic claim that the |
2784 | + # leaf-hash is tied to the challenge: |
2785 | + accum = set( |
2786 | + proto._hash_leaf(21, leaf_data, os.urandom(16)) |
2787 | + for i in range(count) |
2788 | + ) |
2789 | + self.assertEqual(len(accum), count) |
2790 | + |
2791 | + def test_hash_file_size(self): |
2792 | + proto = protocols.OldProtocol(MiB, 200) |
2793 | + accum = set() |
2794 | + |
2795 | + key = proto._hash_file_size(1) |
2796 | + self.assertNotIn(key, accum) |
2797 | + accum.add(key) |
2798 | + self.assertEqual(key, |
2799 | + skein512(b'1', |
2800 | + digest_bits=200, |
2801 | + pers=protocols.PERS_FILE_SIZE, |
2802 | + ).digest() |
2803 | + ) |
2804 | + |
2805 | + key = proto._hash_file_size(18) |
2806 | + self.assertNotIn(key, accum) |
2807 | + accum.add(key) |
2808 | + self.assertEqual(key, |
2809 | + skein512(b'18', |
2810 | + digest_bits=200, |
2811 | + pers=protocols.PERS_FILE_SIZE, |
2812 | + ).digest() |
2813 | + ) |
2814 | + self.assertEqual(len(accum), 2) |
2815 | + |
2816 | + count = 25 * 1000 |
2817 | + accum = set( |
2818 | + proto._hash_file_size(size) for |
2819 | + size in range(1, count + 1) |
2820 | + ) |
2821 | + self.assertEqual(len(accum), count) |
2822 | + |
2823 | + ############################################# |
2824 | + # Again, this time with different digest_bits |
2825 | + proto = protocols.OldProtocol(MiB, 240) |
2826 | + accum = set() |
2827 | + |
2828 | + key = proto._hash_file_size(1) |
2829 | + self.assertNotIn(key, accum) |
2830 | + accum.add(key) |
2831 | + self.assertEqual(key, |
2832 | + skein512(b'1', |
2833 | + digest_bits=240, |
2834 | + pers=protocols.PERS_FILE_SIZE, |
2835 | + ).digest() |
2836 | + ) |
2837 | + |
2838 | + key = proto._hash_file_size(18) |
2839 | + self.assertNotIn(key, accum) |
2840 | + accum.add(key) |
2841 | + self.assertEqual(key, |
2842 | + skein512(b'18', |
2843 | + digest_bits=240, |
2844 | + pers=protocols.PERS_FILE_SIZE, |
2845 | + ).digest() |
2846 | + ) |
2847 | + self.assertEqual(len(accum), 2) |
2848 | + |
2849 | + count = 25 * 1000 |
2850 | + accum = set( |
2851 | + proto._hash_file_size(size) for |
2852 | + size in range(1, count + 1) |
2853 | + ) |
2854 | + self.assertEqual(len(accum), count) |
2855 | + |
2856 | + def test_hash_root(self): |
2857 | + proto = protocols.OldProtocol(MiB, 200) |
2858 | + key1 = proto._hash_file_size(1) |
2859 | + key18 = proto._hash_file_size(18) |
2860 | + keyM = proto._hash_file_size(MiB) |
2861 | + keyM1 = proto._hash_file_size(MiB + 1) |
2862 | + keyM18 = proto._hash_file_size(MiB + 18) |
2863 | + key2M = proto._hash_file_size(2 * MiB) |
2864 | + |
2865 | + accum = set() |
2866 | + |
2867 | + # One leaf, file_size=1 |
2868 | + leaf_hashes = b'D' * 25 |
2869 | + digest = proto._hash_root(1, leaf_hashes) |
2870 | + self.assertNotIn(digest, accum) |
2871 | + accum.add(digest) |
2872 | + self.assertEqual(digest, |
2873 | + skein512(key1 + leaf_hashes, |
2874 | + digest_bits=200, |
2875 | + pers=protocols.PERS_ROOT, |
2876 | + ).digest() |
2877 | + ) |
2878 | + self.assertEqual( |
2879 | + proto.hash_root(1, leaf_hashes), |
2880 | + b32encode(digest).decode('utf-8') |
2881 | + ) |
2882 | + |
2883 | + # One leaf, file_size=18 |
2884 | + digest = proto._hash_root(18, leaf_hashes) |
2885 | + self.assertNotIn(digest, accum) |
2886 | + accum.add(digest) |
2887 | + self.assertEqual(digest, |
2888 | + skein512(key18 + leaf_hashes, |
2889 | + digest_bits=200, |
2890 | + pers=protocols.PERS_ROOT, |
2891 | + ).digest() |
2892 | + ) |
2893 | + self.assertEqual( |
2894 | + proto.hash_root(18, leaf_hashes), |
2895 | + b32encode(digest).decode('utf-8') |
2896 | + ) |
2897 | + |
2898 | + # One leaf, file_size=MiB |
2899 | + digest = proto._hash_root(MiB, leaf_hashes) |
2900 | + self.assertNotIn(digest, accum) |
2901 | + accum.add(digest) |
2902 | + self.assertEqual(digest, |
2903 | + skein512(keyM + leaf_hashes, |
2904 | + digest_bits=200, |
2905 | + pers=protocols.PERS_ROOT, |
2906 | + ).digest() |
2907 | + ) |
2908 | + self.assertEqual( |
2909 | + proto.hash_root(MiB, leaf_hashes), |
2910 | + b32encode(digest).decode('utf-8') |
2911 | + ) |
2912 | + |
2913 | + # Two leaves, file_size=(MiB + 1) |
2914 | + leaf_hashes = b'D' * 50 |
2915 | + digest = proto._hash_root(MiB + 1, leaf_hashes) |
2916 | + self.assertNotIn(digest, accum) |
2917 | + accum.add(digest) |
2918 | + self.assertEqual(digest, |
2919 | + skein512(keyM1 + leaf_hashes, |
2920 | + digest_bits=200, |
2921 | + pers=protocols.PERS_ROOT, |
2922 | + ).digest() |
2923 | + ) |
2924 | + self.assertEqual( |
2925 | + proto.hash_root(MiB + 1, leaf_hashes), |
2926 | + b32encode(digest).decode('utf-8') |
2927 | + ) |
2928 | + |
2929 | + # Two leaves, file_size=(MiB + 18) |
2930 | + digest = proto._hash_root(MiB + 18, leaf_hashes) |
2931 | + self.assertNotIn(digest, accum) |
2932 | + accum.add(digest) |
2933 | + self.assertEqual(digest, |
2934 | + skein512(keyM18 + leaf_hashes, |
2935 | + digest_bits=200, |
2936 | + pers=protocols.PERS_ROOT, |
2937 | + ).digest() |
2938 | + ) |
2939 | + self.assertEqual( |
2940 | + proto.hash_root(MiB + 18, leaf_hashes), |
2941 | + b32encode(digest).decode('utf-8') |
2942 | + ) |
2943 | + |
2944 | + # Two leaves, file_size=(2 * MiB) |
2945 | + digest = proto._hash_root(2 * MiB, leaf_hashes) |
2946 | + self.assertNotIn(digest, accum) |
2947 | + accum.add(digest) |
2948 | + self.assertEqual(digest, |
2949 | + skein512(key2M + leaf_hashes, |
2950 | + digest_bits=200, |
2951 | + pers=protocols.PERS_ROOT, |
2952 | + ).digest() |
2953 | + ) |
2954 | + self.assertEqual( |
2955 | + proto.hash_root(2 * MiB, leaf_hashes), |
2956 | + b32encode(digest).decode('utf-8') |
2957 | + ) |
2958 | + |
2959 | + # Make sure we didn't goof: |
2960 | + self.assertEqual(len(accum), 6) |
2961 | + |
2962 | + # A 25k value sanity check on our crytographic claim that the |
2963 | + # root-hash is tied to the file-size: |
2964 | + count = 25 * 1000 |
2965 | + leaf_hashes = b'D' * 25 |
2966 | + accum = set( |
2967 | + proto._hash_root(size, leaf_hashes) |
2968 | + for size in range(1, count + 1) |
2969 | + ) |
2970 | + self.assertEqual(len(accum), count) |
2971 | + |
2972 | + # A 25k random value sanity check on our crytographic claim that the |
2973 | + # root-hash is tied to the leaf-hashes: |
2974 | + accum = set( |
2975 | + proto._hash_root(21, os.urandom(25)) |
2976 | + for i in range(count) |
2977 | + ) |
2978 | + self.assertEqual(len(accum), count) |
2979 | |
2980 | === modified file 'setup.py' |
2981 | --- setup.py 2012-11-01 06:15:22 +0000 |
2982 | +++ setup.py 2012-12-31 04:35:24 +0000 |
2983 | @@ -49,7 +49,14 @@ |
2984 | pass |
2985 | |
2986 | def run(self): |
2987 | - pynames = ['filestore', 'test_filestore'] |
2988 | + pynames = [ |
2989 | + 'filestore', |
2990 | + 'filestore.protocols', |
2991 | + 'filestore.misc', |
2992 | + 'filestore.tests', |
2993 | + 'filestore.tests.test_protocols', |
2994 | + 'filestore.tests.test_misc' |
2995 | + ] |
2996 | |
2997 | # Add unit-tests: |
2998 | loader = TestLoader() |
2999 | @@ -77,7 +84,8 @@ |
3000 | author='Jason Gerard DeRose', |
3001 | author_email='jderose@novacut.com', |
3002 | license='AGPLv3+', |
3003 | - py_modules=['filestore'], |
3004 | + packages=['filestore', 'filestore.tests'], |
3005 | + package_data={'filestore': ['data/test-vectors.json']}, |
3006 | scripts=['dmediasum'], |
3007 | cmdclass={'test': Test}, |
3008 | ext_modules=[_filestore], |
Oh, and for further details, see this bug:
https:/ /bugs.launchpad .net/filestore/ +bug/1094801