Merge lp:~jderose/filestore/helpers into lp:filestore
- helpers
- Merge into trunk
Status: | Merged |
---|---|
Merged at revision: | 190 |
Proposed branch: | lp:~jderose/filestore/helpers |
Merge into: | lp:filestore |
Diff against target: |
453 lines (+251/-50) 2 files modified
filestore.py (+95/-42) test_filestore.py (+156/-8) |
To merge this branch: | bzr merge lp:~jderose/filestore/helpers |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
David Jordan | Approve | ||
Review via email: mp+74932@code.launchpad.net |
Commit message
Description of the change
This merge:
* Adds 3 new highish-level FileStore methods:
FileStore.
FileStore.
FileStore.
* Makes the use of the `Leaf` namedtuple consistent. Experience has shown the code is simpler and more robust if you keep track of the position in the file (leaf_index) a given chunk of leaf_data corresponds to. As of several revisions ago, when a leaf is read by reader() or batch_reader(), the index and data are bundled together right there in a Leaf(index, data) namedtuple. But this merge fixes some inconsistencies higher up:
- Consumers of `Leaf` no longer unpack it into (i, data)
- Consumers of `Leaf` yield/return the exact `Leaf` rather than (i, data)
- Hasher.update() has been renamed to the clearer Hasher.hash_leaf()
- Hasher.hash_leaf() now takes a `Leaf` instead of `bytes`
- Hasher is now more robust as it makes sure the Leaf.index from the readers matches up with its expected leaf_index
Preview Diff
1 | === modified file 'filestore.py' | |||
2 | --- filestore.py 2011-09-11 17:13:58 +0000 | |||
3 | +++ filestore.py 2011-09-12 03:19:24 +0000 | |||
4 | @@ -298,7 +298,8 @@ | |||
5 | 298 | import tempfile | 298 | import tempfile |
6 | 299 | import stat | 299 | import stat |
7 | 300 | from subprocess import check_call, CalledProcessError | 300 | from subprocess import check_call, CalledProcessError |
9 | 301 | from base64 import b32encode, b32decode | 301 | from base64 import b32encode, b32decode, b64encode |
10 | 302 | import hashlib | ||
11 | 302 | from threading import Thread | 303 | from threading import Thread |
12 | 303 | from queue import Queue | 304 | from queue import Queue |
13 | 304 | from collections import namedtuple | 305 | from collections import namedtuple |
14 | @@ -724,25 +725,26 @@ | |||
15 | 724 | self.array = bytearray() | 725 | self.array = bytearray() |
16 | 725 | self.closed = False | 726 | self.closed = False |
17 | 726 | 727 | ||
23 | 727 | def update(self, leaf): | 728 | def hash_leaf(self, leaf): |
24 | 728 | assert isinstance(leaf, bytes) | 729 | if not isinstance(leaf, Leaf): |
25 | 729 | assert 1 <= len(leaf) <= LEAF_SIZE | 730 | raise TypeError(TYPE_ERROR.format('leaf', Leaf, type(leaf), leaf)) |
26 | 730 | assert not self.closed | 731 | if self.closed: |
27 | 731 | if len(leaf) < LEAF_SIZE: | 732 | raise Exception('Cannot call Hasher.hash_leaf() when Hasher.closed') |
28 | 733 | if leaf.index != self.leaf_index: | ||
29 | 734 | raise Exception('Expected leaf.index {}, got {}'.format( | ||
30 | 735 | self.leaf_index, leaf.index) | ||
31 | 736 | ) | ||
32 | 737 | |||
33 | 738 | if len(leaf.data) < LEAF_SIZE: | ||
34 | 732 | self.closed = True | 739 | self.closed = True |
37 | 733 | 740 | leaf_hash = hash_leaf(leaf.index, leaf.data) | |
36 | 734 | leaf_hash = hash_leaf(self.leaf_index, leaf) | ||
38 | 735 | self.array.extend(leaf_hash) | 741 | self.array.extend(leaf_hash) |
45 | 736 | self.file_size += len(leaf) | 742 | self.file_size += len(leaf.data) |
40 | 737 | |||
41 | 738 | low = self.leaf_index * LEAF_SIZE + 1 | ||
42 | 739 | high = (self.leaf_index + 1) * LEAF_SIZE | ||
43 | 740 | assert low <= self.file_size <= high | ||
44 | 741 | |||
46 | 742 | self.leaf_index += 1 | 743 | self.leaf_index += 1 |
47 | 743 | return leaf_hash | 744 | return leaf_hash |
48 | 744 | 745 | ||
49 | 745 | def content_hash(self): | 746 | def content_hash(self): |
50 | 747 | self.closed = True | ||
51 | 746 | leaf_hashes = bytes(self.array) | 748 | leaf_hashes = bytes(self.array) |
52 | 747 | return ContentHash( | 749 | return ContentHash( |
53 | 748 | hash_root(self.file_size, leaf_hashes), | 750 | hash_root(self.file_size, leaf_hashes), |
54 | @@ -824,14 +826,14 @@ | |||
55 | 824 | assert isinstance(src_fp, (io.BufferedReader, io.BufferedRandom)) | 826 | assert isinstance(src_fp, (io.BufferedReader, io.BufferedRandom)) |
56 | 825 | assert src_fp.mode in ('rb', 'rb+') | 827 | assert src_fp.mode in ('rb', 'rb+') |
57 | 826 | src_fp.seek(0) | 828 | src_fp.seek(0) |
59 | 827 | i = 0 | 829 | index = 0 |
60 | 828 | while True: | 830 | while True: |
61 | 829 | data = src_fp.read(LEAF_SIZE) | 831 | data = src_fp.read(LEAF_SIZE) |
62 | 830 | if not data: | 832 | if not data: |
63 | 831 | queue.put(None) | 833 | queue.put(None) |
64 | 832 | break | 834 | break |
67 | 833 | queue.put(Leaf(i, data)) | 835 | queue.put(Leaf(index, data)) |
68 | 834 | i += 1 | 836 | index += 1 |
69 | 835 | 837 | ||
70 | 836 | 838 | ||
71 | 837 | def batch_reader(batch, queue): | 839 | def batch_reader(batch, queue): |
72 | @@ -890,8 +892,7 @@ | |||
73 | 890 | """ | 892 | """ |
74 | 891 | Iterate through leaves in *src_fp*, reading in a separate thread. | 893 | Iterate through leaves in *src_fp*, reading in a separate thread. |
75 | 892 | 894 | ||
78 | 893 | The function yields a ``(leaf_index, leaf_data)`` tuple for each leaf in | 895 | The function yields a `Leaf` namedtuple for each leaf in *src_fp*. |
77 | 894 | *src_fp*. | ||
79 | 895 | 896 | ||
80 | 896 | Many operations in the `FileStore` involve a loop where an 8 MiB leaf is | 897 | Many operations in the `FileStore` involve a loop where an 8 MiB leaf is |
81 | 897 | read, then hashed, then possibly written to disk. As the hashing | 898 | read, then hashed, then possibly written to disk. As the hashing |
82 | @@ -904,8 +905,8 @@ | |||
83 | 904 | >>> def example(): | 905 | >>> def example(): |
84 | 905 | ... h = Hasher() | 906 | ... h = Hasher() |
85 | 906 | ... src_fp = open('/my/file', 'rb') | 907 | ... src_fp = open('/my/file', 'rb') |
88 | 907 | ... for (leaf_index, leaf) in reader_iter(src_fp): | 908 | ... for leaf in reader_iter(src_fp): |
89 | 908 | ... h.update(leaf) | 909 | ... h.hash_leaf(leaf) |
90 | 909 | ... | 910 | ... |
91 | 910 | 911 | ||
92 | 911 | The thread target is the `reader()` function. | 912 | The thread target is the `reader()` function. |
93 | @@ -987,7 +988,7 @@ | |||
94 | 987 | leaf = q.get() | 988 | leaf = q.get() |
95 | 988 | if leaf is EndFile: | 989 | if leaf is EndFile: |
96 | 989 | break | 990 | break |
98 | 990 | h.update(leaf.data) | 991 | h.hash_leaf(leaf) |
99 | 991 | for tmp_fp in temps: | 992 | for tmp_fp in temps: |
100 | 992 | tmp_fp.write(leaf.data) | 993 | tmp_fp.write(leaf.data) |
101 | 993 | ch = h.content_hash() | 994 | ch = h.content_hash() |
102 | @@ -1013,10 +1014,10 @@ | |||
103 | 1013 | :param dst_fp: optional file opened in ``'wb'`` mode | 1014 | :param dst_fp: optional file opened in ``'wb'`` mode |
104 | 1014 | """ | 1015 | """ |
105 | 1015 | hasher = Hasher() | 1016 | hasher = Hasher() |
108 | 1016 | for (i, leaf) in reader_iter(src_fp): | 1017 | for leaf in reader_iter(src_fp): |
109 | 1017 | hasher.update(leaf) | 1018 | hasher.hash_leaf(leaf) |
110 | 1018 | if dst_fp: | 1019 | if dst_fp: |
112 | 1019 | dst_fp.write(leaf) | 1020 | dst_fp.write(leaf.data) |
113 | 1020 | return hasher.content_hash() | 1021 | return hasher.content_hash() |
114 | 1021 | 1022 | ||
115 | 1022 | 1023 | ||
116 | @@ -1296,37 +1297,61 @@ | |||
117 | 1296 | if size != file_size: | 1297 | if size != file_size: |
118 | 1297 | corrupt = self.move_to_corrupt(src_fp, _id) | 1298 | corrupt = self.move_to_corrupt(src_fp, _id) |
119 | 1298 | raise SizeIntegrityError(self.parent, _id, file_size, size) | 1299 | raise SizeIntegrityError(self.parent, _id, file_size, size) |
123 | 1299 | for (i, leaf) in reader_iter(src_fp): | 1300 | for leaf in reader_iter(src_fp): |
124 | 1300 | got = hash_leaf(i, leaf) | 1301 | got = hash_leaf(leaf.index, leaf.data) |
125 | 1301 | expected = get_leaf_hash(leaf_hashes, i) | 1302 | expected = get_leaf_hash(leaf_hashes, leaf.index) |
126 | 1302 | if got != expected: | 1303 | if got != expected: |
127 | 1303 | self.move_to_corrupt(src_fp, _id) | 1304 | self.move_to_corrupt(src_fp, _id) |
131 | 1304 | raise LeafIntegrityError(self.parent, _id, i, expected, got) | 1305 | raise LeafIntegrityError( |
132 | 1305 | yield (i, leaf) | 1306 | self.parent, _id, leaf.index, expected, got |
133 | 1306 | assert get_leaf_hash(leaf_hashes, i + 1) == b'' | 1307 | ) |
134 | 1308 | yield leaf | ||
135 | 1309 | assert get_leaf_hash(leaf_hashes, leaf.index + 1) == b'' | ||
136 | 1307 | 1310 | ||
137 | 1308 | def verify_iter2(self, _id): | 1311 | def verify_iter2(self, _id): |
138 | 1309 | """ | 1312 | """ |
139 | 1310 | Yield each leaf as it's read, verifying file integrity after final leaf. | 1313 | Yield each leaf as it's read, verifying file integrity after final leaf. |
140 | 1311 | 1314 | ||
145 | 1312 | This method will yield ``(leaf_index, leaf_data)`` tuple for each leaf | 1315 | This method will yield a `Leaf` namedtuple for each leaf in the file |
146 | 1313 | in the file identified by *_id*. Use this method with care as the | 1316 | identified by *_id*. Use this method with care as the integrity of the |
147 | 1314 | integrity of the leaves is not known till after the last leaf has been | 1317 | leaves is not known till after the last leaf has been yielded. |
144 | 1315 | yielded. | ||
148 | 1316 | 1318 | ||
149 | 1317 | This method is similar to `FileStore.verify_iter()` except only the | 1319 | This method is similar to `FileStore.verify_iter()` except only the |
150 | 1318 | *_id* is needed. | 1320 | *_id* is needed. |
151 | 1319 | """ | 1321 | """ |
152 | 1320 | src_fp = self.open(_id) | 1322 | src_fp = self.open(_id) |
153 | 1321 | h = Hasher() | 1323 | h = Hasher() |
157 | 1322 | for (i, leaf) in reader_iter(src_fp): | 1324 | for leaf in reader_iter(src_fp): |
158 | 1323 | h.update(leaf) | 1325 | h.hash_leaf(leaf) |
159 | 1324 | yield (i, leaf) | 1326 | yield leaf |
160 | 1325 | c = h.content_hash() | 1327 | c = h.content_hash() |
161 | 1326 | if c.id != _id: | 1328 | if c.id != _id: |
162 | 1327 | self.move_to_corrupt(src_fp, _id) | 1329 | self.move_to_corrupt(src_fp, _id) |
163 | 1328 | raise FileIntegrityError(self.parent, _id, c.id) | 1330 | raise FileIntegrityError(self.parent, _id, c.id) |
164 | 1329 | 1331 | ||
165 | 1332 | def content_md5(self, _id): | ||
166 | 1333 | """ | ||
167 | 1334 | Compute md5 hash of the file with *_id* for use in Content-MD5 header. | ||
168 | 1335 | |||
169 | 1336 | For example: | ||
170 | 1337 | |||
171 | 1338 | >>> fs = FileStore('/home/jderose') #doctest: +SKIP | ||
172 | 1339 | >>> fs.content_md5('YDDL5ROVABZP4NBSJPC3HUQDVDAGAP5L26YFXD3UR6N5OLVN') #doctest: +SKIP | ||
173 | 1340 | ('99ca2a74521ad7825768bbfe7fe0dc49', 'mcoqdFIa14JXaLv+f+DcSQ==') | ||
174 | 1341 | |||
175 | 1342 | This method guarantees the correct md5 hash is computed for the file | ||
176 | 1343 | with *_id* because it verifies the file as it computes the md5 hash. | ||
177 | 1344 | Note that you do not have this guarantee if you simply opened the file | ||
178 | 1345 | with `FileStore.open()` and computed the md5 hash that way. | ||
179 | 1346 | |||
180 | 1347 | If you need the md5 hash for use with boto when uploading to S3, use | ||
181 | 1348 | this method rather than letting boto compute the md5 hash itself. | ||
182 | 1349 | """ | ||
183 | 1350 | md5 = hashlib.md5() | ||
184 | 1351 | for leaf in self.verify_iter2(_id): | ||
185 | 1352 | md5.update(leaf.data) | ||
186 | 1353 | return (md5.hexdigest(), b64encode(md5.digest()).decode('utf-8')) | ||
187 | 1354 | |||
188 | 1330 | def remove(self, _id): | 1355 | def remove(self, _id): |
189 | 1331 | """ | 1356 | """ |
190 | 1332 | Delete file with *_id* from underlying filesystem. | 1357 | Delete file with *_id* from underlying filesystem. |
191 | @@ -1445,7 +1470,7 @@ | |||
192 | 1445 | """ | 1470 | """ |
193 | 1446 | Move a file from its canonical location to its corrupt location. | 1471 | Move a file from its canonical location to its corrupt location. |
194 | 1447 | 1472 | ||
196 | 1448 | While a file is found to be corrupt (meaning the computed content hash | 1473 | When a file is found to be corrupt (meaning the computed content hash |
197 | 1449 | doesn't match the expected content hash), it is moved from its canonical | 1474 | doesn't match the expected content hash), it is moved from its canonical |
198 | 1450 | location to a special corrupt location. | 1475 | location to a special corrupt location. |
199 | 1451 | 1476 | ||
200 | @@ -1476,6 +1501,34 @@ | |||
201 | 1476 | src_fp.close() | 1501 | src_fp.close() |
202 | 1477 | return dst | 1502 | return dst |
203 | 1478 | 1503 | ||
204 | 1504 | def verify_and_move(self, tmp_fp, _id): | ||
205 | 1505 | allowed = (io.BufferedReader, io.BufferedRandom) | ||
206 | 1506 | if not isinstance(tmp_fp, allowed): | ||
207 | 1507 | raise TypeError( | ||
208 | 1508 | TYPE_ERROR.format('tmp_fp', allowed, type(tmp_fp), tmp_fp) | ||
209 | 1509 | ) | ||
210 | 1510 | if tmp_fp.name != self.partial_path(_id): | ||
211 | 1511 | raise ValueError( | ||
212 | 1512 | 'bad partial_path() for {!r}: {!r}'.format(_id, tmp_fp.name) | ||
213 | 1513 | ) | ||
214 | 1514 | ch = hash_fp(tmp_fp) | ||
215 | 1515 | if ch.id != _id: | ||
216 | 1516 | raise ValueError( | ||
217 | 1517 | 'expected {!r}, computed {!r}'.format(_id, ch.id) | ||
218 | 1518 | ) | ||
219 | 1519 | self.move_to_canonical(tmp_fp, _id) | ||
220 | 1520 | return ch | ||
221 | 1521 | |||
222 | 1522 | def hash_and_move(self, tmp_fp): | ||
223 | 1523 | allowed = (io.BufferedReader, io.BufferedRandom) | ||
224 | 1524 | if not isinstance(tmp_fp, allowed): | ||
225 | 1525 | raise TypeError( | ||
226 | 1526 | TYPE_ERROR.format('tmp_fp', allowed, type(tmp_fp), tmp_fp) | ||
227 | 1527 | ) | ||
228 | 1528 | ch = hash_fp(tmp_fp) | ||
229 | 1529 | self.move_to_canonical(tmp_fp, ch.id) | ||
230 | 1530 | return ch | ||
231 | 1531 | |||
232 | 1479 | def import_file(self, src_fp): | 1532 | def import_file(self, src_fp): |
233 | 1480 | """ | 1533 | """ |
234 | 1481 | Atomically copy open file *src_fp* into this filestore. | 1534 | Atomically copy open file *src_fp* into this filestore. |
235 | @@ -1524,10 +1577,10 @@ | |||
236 | 1524 | size = os.fstat(src_fp.fileno()).st_size | 1577 | size = os.fstat(src_fp.fileno()).st_size |
237 | 1525 | temps = [fs.allocate_tmp(size) for fs in filestores] | 1578 | temps = [fs.allocate_tmp(size) for fs in filestores] |
238 | 1526 | h = Hasher() | 1579 | h = Hasher() |
241 | 1527 | for (i, leaf) in reader_iter(src_fp): | 1580 | for leaf in reader_iter(src_fp): |
242 | 1528 | h.update(leaf) | 1581 | h.hash_leaf(leaf) |
243 | 1529 | for tmp_fp in temps: | 1582 | for tmp_fp in temps: |
245 | 1530 | tmp_fp.write(leaf) | 1583 | tmp_fp.write(leaf.data) |
246 | 1531 | c = h.content_hash() | 1584 | c = h.content_hash() |
247 | 1532 | if c.id != _id: | 1585 | if c.id != _id: |
248 | 1533 | self.move_to_corrupt(src_fp, _id) | 1586 | self.move_to_corrupt(src_fp, _id) |
249 | 1534 | 1587 | ||
250 | === modified file 'test_filestore.py' | |||
251 | --- test_filestore.py 2011-09-11 09:13:31 +0000 | |||
252 | +++ test_filestore.py 2011-09-12 03:19:24 +0000 | |||
253 | @@ -1052,30 +1052,76 @@ | |||
254 | 1052 | self.assertEqual(h.array, b'') | 1052 | self.assertEqual(h.array, b'') |
255 | 1053 | self.assertFalse(h.closed) | 1053 | self.assertFalse(h.closed) |
256 | 1054 | 1054 | ||
261 | 1055 | def test_update(self): | 1055 | def test_hash_leaf(self): |
262 | 1056 | h = filestore.Hasher() | 1056 | # Test with bad leaf type |
263 | 1057 | 1057 | h = filestore.Hasher() | |
264 | 1058 | self.assertEqual(h.update(LEAVES[0]), LEAF_HASHES[0:30]) | 1058 | with self.assertRaises(TypeError) as cm: |
265 | 1059 | h.hash_leaf(b'nope') | ||
266 | 1060 | self.assertEqual( | ||
267 | 1061 | str(cm.exception), | ||
268 | 1062 | TYPE_ERROR.format('leaf', filestore.Leaf, bytes, b'nope') | ||
269 | 1063 | ) | ||
270 | 1064 | |||
271 | 1065 | # We'll use these below | ||
272 | 1066 | leaf0 = filestore.Leaf(0, LEAVES[0]) | ||
273 | 1067 | leaf1 = filestore.Leaf(1, LEAVES[1]) | ||
274 | 1068 | leaf2 = filestore.Leaf(2, LEAVES[2]) | ||
275 | 1069 | |||
276 | 1070 | # Test when closed | ||
277 | 1071 | h = filestore.Hasher() | ||
278 | 1072 | h.closed = True | ||
279 | 1073 | with self.assertRaises(Exception) as cm: | ||
280 | 1074 | h.hash_leaf(leaf0) | ||
281 | 1075 | self.assertEqual( | ||
282 | 1076 | str(cm.exception), | ||
283 | 1077 | 'Cannot call Hasher.hash_leaf() when Hasher.closed' | ||
284 | 1078 | ) | ||
285 | 1079 | |||
286 | 1080 | # Test when leaf_index is wrong | ||
287 | 1081 | h = filestore.Hasher() | ||
288 | 1082 | h.leaf_index = 1 | ||
289 | 1083 | with self.assertRaises(Exception) as cm: | ||
290 | 1084 | h.hash_leaf(leaf0) | ||
291 | 1085 | self.assertEqual( | ||
292 | 1086 | str(cm.exception), | ||
293 | 1087 | 'Expected leaf.index 1, got 0' | ||
294 | 1088 | ) | ||
295 | 1089 | |||
296 | 1090 | # Test when it's all good | ||
297 | 1091 | h = filestore.Hasher() | ||
298 | 1092 | |||
299 | 1093 | self.assertEqual(h.hash_leaf(leaf0), LEAF_HASHES[0:30]) | ||
300 | 1059 | self.assertEqual(h.leaf_index, 1) | 1094 | self.assertEqual(h.leaf_index, 1) |
301 | 1060 | self.assertEqual(h.file_size, filestore.LEAF_SIZE) | 1095 | self.assertEqual(h.file_size, filestore.LEAF_SIZE) |
302 | 1061 | self.assertFalse(h.closed) | 1096 | self.assertFalse(h.closed) |
303 | 1062 | 1097 | ||
305 | 1063 | self.assertEqual(h.update(LEAVES[1]), LEAF_HASHES[30:60]) | 1098 | self.assertEqual(h.hash_leaf(leaf1), LEAF_HASHES[30:60]) |
306 | 1064 | self.assertEqual(h.leaf_index, 2) | 1099 | self.assertEqual(h.leaf_index, 2) |
307 | 1065 | self.assertEqual(h.file_size, filestore.LEAF_SIZE * 2) | 1100 | self.assertEqual(h.file_size, filestore.LEAF_SIZE * 2) |
308 | 1066 | self.assertFalse(h.closed) | 1101 | self.assertFalse(h.closed) |
309 | 1067 | 1102 | ||
311 | 1068 | self.assertEqual(h.update(LEAVES[2]), LEAF_HASHES[60:90]) | 1103 | self.assertEqual(h.hash_leaf(leaf2), LEAF_HASHES[60:90]) |
312 | 1069 | self.assertEqual(h.leaf_index, 3) | 1104 | self.assertEqual(h.leaf_index, 3) |
313 | 1070 | self.assertEqual(h.file_size, sum(len(l) for l in LEAVES)) | 1105 | self.assertEqual(h.file_size, sum(len(l) for l in LEAVES)) |
314 | 1071 | self.assertTrue(h.closed) | 1106 | self.assertTrue(h.closed) |
315 | 1072 | 1107 | ||
316 | 1073 | def test_content_hash(self): | 1108 | def test_content_hash(self): |
317 | 1109 | leaf0 = filestore.Leaf(0, LEAVES[0]) | ||
318 | 1110 | leaf1 = filestore.Leaf(1, LEAVES[1]) | ||
319 | 1111 | leaf2 = filestore.Leaf(2, LEAVES[2]) | ||
320 | 1112 | |||
321 | 1074 | h = filestore.Hasher() | 1113 | h = filestore.Hasher() |
324 | 1075 | for l in LEAVES: | 1114 | for leaf in (leaf0, leaf1, leaf2): |
325 | 1076 | h.update(l) | 1115 | h.hash_leaf(leaf) |
326 | 1077 | self.assertEqual(h.content_hash(), CH) | 1116 | self.assertEqual(h.content_hash(), CH) |
327 | 1078 | 1117 | ||
328 | 1118 | # Test that Hasher.content_hash() sets closed = True | ||
329 | 1119 | h = filestore.Hasher() | ||
330 | 1120 | h.hash_leaf(leaf0) | ||
331 | 1121 | self.assertFalse(h.closed) | ||
332 | 1122 | h.content_hash() | ||
333 | 1123 | self.assertTrue(h.closed) | ||
334 | 1124 | |||
335 | 1079 | 1125 | ||
336 | 1080 | class TestFileStore(TestCase): | 1126 | class TestFileStore(TestCase): |
337 | 1081 | def test_init(self): | 1127 | def test_init(self): |
338 | @@ -1671,6 +1717,50 @@ | |||
339 | 1671 | self.assertFalse(path.exists(canonical)) | 1717 | self.assertFalse(path.exists(canonical)) |
340 | 1672 | self.assertTrue(path.isfile(corrupt)) | 1718 | self.assertTrue(path.isfile(corrupt)) |
341 | 1673 | 1719 | ||
342 | 1720 | def test_content_md5(self): | ||
343 | 1721 | tmp = TempDir() | ||
344 | 1722 | fs = filestore.FileStore(tmp.dir) | ||
345 | 1723 | |||
346 | 1724 | canonical = fs.path(ID) | ||
347 | 1725 | corrupt = fs.corrupt_path(ID) | ||
348 | 1726 | |||
349 | 1727 | # File doesn't exist | ||
350 | 1728 | with self.assertRaises(IOError) as cm: | ||
351 | 1729 | md5 = fs.content_md5(ID) | ||
352 | 1730 | self.assertEqual(cm.exception.errno, 2) | ||
353 | 1731 | |||
354 | 1732 | # File exists: | ||
355 | 1733 | fp = open(canonical, 'wb') | ||
356 | 1734 | for leaf in LEAVES: | ||
357 | 1735 | fp.write(leaf) | ||
358 | 1736 | fp.close() | ||
359 | 1737 | |||
360 | 1738 | self.assertEqual( | ||
361 | 1739 | fs.content_md5(ID), | ||
362 | 1740 | ('99ca2a74521ad7825768bbfe7fe0dc49', 'mcoqdFIa14JXaLv+f+DcSQ==') | ||
363 | 1741 | ) | ||
364 | 1742 | |||
365 | 1743 | # File exists and is corrupted | ||
366 | 1744 | fp = open(canonical, 'wb') | ||
367 | 1745 | for leaf in LEAVES: | ||
368 | 1746 | fp.write(leaf) | ||
369 | 1747 | fp.write(b'F') | ||
370 | 1748 | fp.close() | ||
371 | 1749 | c = filestore.hash_fp(open(canonical, 'rb')) | ||
372 | 1750 | self.assertNotEqual(c.id, ID) | ||
373 | 1751 | |||
374 | 1752 | self.assertTrue(path.isfile(canonical)) | ||
375 | 1753 | self.assertFalse(path.exists(corrupt)) | ||
376 | 1754 | |||
377 | 1755 | with self.assertRaises(filestore.FileIntegrityError) as cm: | ||
378 | 1756 | md5 = fs.content_md5(ID) | ||
379 | 1757 | self.assertEqual(cm.exception.id, ID) | ||
380 | 1758 | self.assertEqual(cm.exception.parent, tmp.dir) | ||
381 | 1759 | self.assertEqual(cm.exception.bad_id, c.id) | ||
382 | 1760 | |||
383 | 1761 | self.assertFalse(path.exists(canonical)) | ||
384 | 1762 | self.assertTrue(path.isfile(corrupt)) | ||
385 | 1763 | |||
386 | 1674 | def test_remove(self): | 1764 | def test_remove(self): |
387 | 1675 | tmp = TempDir() | 1765 | tmp = TempDir() |
388 | 1676 | fs = filestore.FileStore(tmp.dir) | 1766 | fs = filestore.FileStore(tmp.dir) |
389 | @@ -1890,6 +1980,64 @@ | |||
390 | 1890 | self.assertTrue(path.isfile(corrupt)) | 1980 | self.assertTrue(path.isfile(corrupt)) |
391 | 1891 | self.assertEqual(open(corrupt, 'rb').read(), b'yup') | 1981 | self.assertEqual(open(corrupt, 'rb').read(), b'yup') |
392 | 1892 | 1982 | ||
393 | 1983 | def test_verify_and_move(self): | ||
394 | 1984 | # Test when it's all good | ||
395 | 1985 | tmp = TempDir() | ||
396 | 1986 | fs = filestore.FileStore(tmp.dir) | ||
397 | 1987 | src = fs.partial_path(ID) | ||
398 | 1988 | write_sample_file(src) | ||
399 | 1989 | src_fp = open(src, 'rb') | ||
400 | 1990 | self.assertFalse(fs.exists(ID)) | ||
401 | 1991 | self.assertEqual(fs.verify_and_move(src_fp, ID), CH) | ||
402 | 1992 | self.assertTrue(fs.exists(ID)) | ||
403 | 1993 | self.assertEqual(fs.verify(ID), CH) | ||
404 | 1994 | |||
405 | 1995 | # Test when tmp_fp.name != partial_path(_id) | ||
406 | 1996 | tmp = TempDir() | ||
407 | 1997 | fs = filestore.FileStore(tmp.dir) | ||
408 | 1998 | src = path.join(fs.tmp, 'foo.mov') | ||
409 | 1999 | write_sample_file(src) | ||
410 | 2000 | src_fp = open(src, 'rb') | ||
411 | 2001 | with self.assertRaises(ValueError) as cm: | ||
412 | 2002 | fs.verify_and_move(src_fp, ID) | ||
413 | 2003 | self.assertEqual( | ||
414 | 2004 | str(cm.exception), | ||
415 | 2005 | 'bad partial_path() for {!r}: {!r}'.format(ID, src) | ||
416 | 2006 | ) | ||
417 | 2007 | self.assertFalse(fs.exists(ID)) | ||
418 | 2008 | |||
419 | 2009 | # Test when partial has wrong content | ||
420 | 2010 | tmp = TempDir() | ||
421 | 2011 | fs = filestore.FileStore(tmp.dir) | ||
422 | 2012 | src = fs.partial_path(ID) | ||
423 | 2013 | src_fp = open(src, 'wb') | ||
424 | 2014 | for leaf in LEAVES: | ||
425 | 2015 | src_fp.write(leaf) | ||
426 | 2016 | src_fp.write(b'F') | ||
427 | 2017 | src_fp.close() | ||
428 | 2018 | ch = filestore.hash_fp(open(src, 'rb')) | ||
429 | 2019 | self.assertNotEqual(ch.id, ID) | ||
430 | 2020 | src_fp = open(src, 'rb') | ||
431 | 2021 | with self.assertRaises(ValueError) as cm: | ||
432 | 2022 | fs.verify_and_move(src_fp, ID) | ||
433 | 2023 | self.assertEqual( | ||
434 | 2024 | str(cm.exception), | ||
435 | 2025 | 'expected {!r}, computed {!r}'.format(ID, ch.id) | ||
436 | 2026 | ) | ||
437 | 2027 | self.assertFalse(fs.exists(ID)) | ||
438 | 2028 | |||
439 | 2029 | def test_hash_and_move(self): | ||
440 | 2030 | tmp = TempDir() | ||
441 | 2031 | fs = filestore.FileStore(tmp.dir) | ||
442 | 2032 | |||
443 | 2033 | src = path.join(fs.tmp, 'foo.mov') | ||
444 | 2034 | write_sample_file(src) | ||
445 | 2035 | src_fp = open(src, 'rb') | ||
446 | 2036 | self.assertFalse(fs.exists(ID)) | ||
447 | 2037 | self.assertEqual(fs.hash_and_move(src_fp), CH) | ||
448 | 2038 | self.assertTrue(fs.exists(ID)) | ||
449 | 2039 | self.assertEqual(fs.verify(ID), CH) | ||
450 | 2040 | |||
451 | 1893 | def test_import_file(self): | 2041 | def test_import_file(self): |
452 | 1894 | tmp = TempDir() | 2042 | tmp = TempDir() |
453 | 1895 | src = tmp.join('movie.mov') | 2043 | src = tmp.join('movie.mov') |
Looks a lot simpler. Approved.