Merge lp:~jderose/dmedia/core-split into lp:dmedia
- core-split
- Merge into trunk
Status: | Merged |
---|---|
Merged at revision: | 517 |
Proposed branch: | lp:~jderose/dmedia/core-split |
Merge into: | lp:dmedia |
Diff against target: |
1314 lines (+502/-239) 17 files modified
dmedia-service (+4/-4) dmedia/core.py (+0/-2) dmedia/extractor.py (+8/-20) dmedia/importer.py (+78/-56) dmedia/metastore.py (+114/-43) dmedia/schema.py (+33/-30) dmedia/server.py (+7/-3) dmedia/tests/test_extractor.py (+6/-18) dmedia/tests/test_importer.py (+126/-9) dmedia/tests/test_local.py (+2/-1) dmedia/tests/test_metastore.py (+94/-1) dmedia/tests/test_schema.py (+16/-39) dmedia/tests/test_server.py (+2/-1) dmedia/tests/test_transfers.py (+2/-2) dmedia/tests/test_verification.py (+9/-8) dmedia/verification.py (+1/-1) dmedia/views.py (+0/-1) |
To merge this branch: | bzr merge lp:~jderose/dmedia/core-split |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
David Jordan | Approve | ||
Review via email: mp+135401@code.launchpad.net |
Commit message
Description of the change
While working on this, I found a serious bug in the CouchDB ProxyApp:
https:/
So the merge fixes that. I wasn't including the query string (when present) into the rebuilt request-line, so this request:
POST /db/docid?
Was being forwarded as:
POST /db/docid
Which resulted in a conflict and changes not being replicated.
The main focus was this bug, trimming the dmedia/file docs in dmedia-0 down to only their essential schema:
https:/
And now when you do an import, that's exactly what you get. There is also a new dmedia/log type of doc, and each time a file is imported, one of these log docs is saved. These log docs should never be updated, and they store what was unique about that specific occasion that the file was imported. Stuff like the file name and mtime, the time of the event, the import_id, batch_id, etc. If a duplicate file is again imported, that 2nd import gets its own log doc, preserving all the details we need for rich auditing.
I also fixed some small issues about how the dmedia/file doc gets updated on a duplicate import. Previously we weren't preserving the file pinning, so that's now fix. This is thanks to the new importer.
After thinking about it more, I decided *not* to include any extracted metadata in dmedia-0. Instead, we'll keep that just in the project databases, which is where Novacut and Dmedia are currently expecting this metadata anyway. I made a few small tweaks to the schema of the dmedia/file docs saved in the project databases, but all the same info is still there. Although I decided to no longer store the leaf_hashes redundantly in the project databases, because it's better to only download the leaf_hashes from a peer once you're going to put a file into your library, once you're going to have a corresponding doc in dmedia-0.
Some other small changes that made sense to include in this:
* extractor now uses microfiber Attachement, encode_attachment()
* Hugely improved performance of MetaStore.scan(), MetaStore.relink() by using Database.get_many() to grab 25 docs at a time
* Because of above performance improvement, I turned the background worker back on (was disabled for 12.10 release for performance reasons)
* doc['stored'
* removed the "user" design from dmedia-0 (wasn't being used anyway, and the metadata it drew on is no longer in the dmedia/file docs in dmedia-0)
* ImportWorker now does extraction in its own thread so it's less likely to stall the read/hash/write train
Preview Diff
1 | === modified file 'dmedia-service' |
2 | --- dmedia-service 2012-11-01 11:06:53 +0000 |
3 | +++ dmedia-service 2012-11-21 12:55:46 +0000 |
4 | @@ -173,7 +173,7 @@ |
5 | self.core.set_default_store('shared') |
6 | self.env_s = dumps(self.core.env, pretty=True) |
7 | log.info('Finished core startup in %.3f', time.time() - start) |
8 | - GObject.timeout_add(350, self.on_idle1) |
9 | + GObject.timeout_add(250, self.on_idle1) |
10 | |
11 | def on_idle1(self): |
12 | """ |
13 | @@ -183,7 +183,7 @@ |
14 | if self.couch.pki.user.key_file is not None: |
15 | self.peer = Browser(self, self.couch) |
16 | self.udisks.monitor() |
17 | - GObject.timeout_add(350, self.on_idle2) |
18 | + GObject.timeout_add(500, self.on_idle2) |
19 | |
20 | def on_idle2(self): |
21 | """ |
22 | @@ -191,7 +191,7 @@ |
23 | """ |
24 | log.info('[idle2 at time %.3f]', time.time() - start_time) |
25 | start_thread(self.core.init_project_views) |
26 | - GObject.timeout_add(12*1000, self.on_idle3) |
27 | + GObject.timeout_add(5000, self.on_idle3) |
28 | |
29 | def on_idle3(self): |
30 | """ |
31 | @@ -203,7 +203,7 @@ |
32 | port = env['port'] |
33 | self.avahi = Avahi(self.core.env, port, ssl_config) |
34 | self.avahi.run() |
35 | - #GObject.timeout_add(60*1000, self.on_idle4) |
36 | + GObject.timeout_add(15*1000, self.on_idle4) |
37 | |
38 | def on_idle4(self): |
39 | """ |
40 | |
41 | === modified file 'dmedia/core.py' |
42 | --- dmedia/core.py 2012-11-01 11:06:53 +0000 |
43 | +++ dmedia/core.py 2012-11-21 12:55:46 +0000 |
44 | @@ -274,10 +274,8 @@ |
45 | while True: |
46 | try: |
47 | fs = self.queue.get() |
48 | - start = time.time() |
49 | self.ms.scan(fs) |
50 | self.ms.relink(fs) |
51 | - log.info('%.3f to check %r', time.time() - start, fs) |
52 | except Exception as e: |
53 | log.exception('Error in background worker:') |
54 | |
55 | |
56 | === modified file 'dmedia/extractor.py' |
57 | --- dmedia/extractor.py 2012-05-04 03:15:23 +0000 |
58 | +++ dmedia/extractor.py 2012-11-21 12:55:46 +0000 |
59 | @@ -32,15 +32,13 @@ |
60 | from base64 import b64encode |
61 | import time |
62 | import calendar |
63 | -from collections import namedtuple |
64 | |
65 | from filestore import hash_fp |
66 | +from microfiber import Attachment, encode_attachment |
67 | |
68 | import dmedia |
69 | |
70 | |
71 | -Thumbnail = namedtuple('Thumbnail', 'content_type data') |
72 | - |
73 | dmedia_extract = 'dmedia-extract' |
74 | tree = path.dirname(path.dirname(path.abspath(dmedia.__file__))) |
75 | if path.isfile(path.join(tree, 'setup.py')): |
76 | @@ -267,7 +265,7 @@ |
77 | dst, |
78 | ] |
79 | check_call(cmd) |
80 | - return Thumbnail('image/jpeg', open(dst, 'rb').read()) |
81 | + return Attachment('image/jpeg', open(dst, 'rb').read()) |
82 | |
83 | |
84 | def thumbnail_video(src, tmp): |
85 | @@ -294,11 +292,14 @@ |
86 | cmd = [ |
87 | 'ufraw-batch', |
88 | '--embedded-image', |
89 | + '--noexif', |
90 | + '--size', str(SIZE), |
91 | + '--compression', '90', |
92 | '--output', dst, |
93 | src, |
94 | ] |
95 | check_call(cmd) |
96 | - return thumbnail_image(dst, tmp) |
97 | + return Attachment('image/jpeg', open(dst, 'rb').read()) |
98 | |
99 | |
100 | thumbnailers = { |
101 | @@ -332,14 +333,6 @@ |
102 | shutil.rmtree(tmp) |
103 | |
104 | |
105 | -def to_attachment(thm): |
106 | - assert isinstance(thm, Thumbnail) |
107 | - return { |
108 | - 'content_type': thm.content_type, |
109 | - 'data': b64encode(thm.data).decode('utf-8'), |
110 | - } |
111 | - |
112 | - |
113 | def get_thumbnail_func(doc): |
114 | media = doc.get('media') |
115 | if media not in ('video', 'image'): |
116 | @@ -351,7 +344,6 @@ |
117 | return thumbnail_image |
118 | |
119 | |
120 | - |
121 | def merge_thumbnail(src, doc): |
122 | func = get_thumbnail_func(doc) |
123 | if func is None: |
124 | @@ -359,10 +351,6 @@ |
125 | thm = wrap_thumbnail_func(func, src) |
126 | if thm is None: |
127 | return False |
128 | - doc['_attachments']['thumbnail'] = to_attachment(thm) |
129 | + doc['_attachments']['thumbnail'] = encode_attachment(thm) |
130 | return True |
131 | - |
132 | - |
133 | - |
134 | - |
135 | - |
136 | + |
137 | |
138 | === modified file 'dmedia/importer.py' |
139 | --- dmedia/importer.py 2012-11-01 11:06:53 +0000 |
140 | +++ dmedia/importer.py 2012-11-21 12:55:46 +0000 |
141 | @@ -35,10 +35,12 @@ |
142 | import logging |
143 | import mimetypes |
144 | import shutil |
145 | +from queue import Queue |
146 | |
147 | -import microfiber |
148 | +from microfiber import NotFound, has_attachment, encode_attachment |
149 | from filestore import FileStore, scandir, batch_import_iter, statvfs |
150 | |
151 | +from dmedia.parallel import start_thread |
152 | from dmedia.util import get_project_db |
153 | from dmedia.units import bytes10 |
154 | from dmedia import workers, schema |
155 | @@ -159,6 +161,16 @@ |
156 | pass |
157 | |
158 | |
159 | +def merge_stored(old, new): |
160 | + for (key, value) in new.items(): |
161 | + assert set(value) == set(['copies', 'mtime']) |
162 | + if key in old: |
163 | + old[key].update(value) |
164 | + old[key].pop('verified', None) |
165 | + else: |
166 | + old[key] = value |
167 | + |
168 | + |
169 | class ImportWorker(workers.CouchWorker): |
170 | def __init__(self, env, q, key, args): |
171 | super().__init__(env, q, key, args) |
172 | @@ -169,6 +181,7 @@ |
173 | self.extract = self.env.get('extract', True) |
174 | self.project = get_project_db(self.env['project_id'], self.env) |
175 | self.project.ensure() |
176 | + self.extraction_queue = Queue(10) |
177 | |
178 | def execute(self, basedir, extra=None): |
179 | self.extra = extra |
180 | @@ -218,23 +231,38 @@ |
181 | return stores |
182 | |
183 | def import_all(self): |
184 | + self.thumbnail = None |
185 | + extractor = start_thread(self.extractor) |
186 | stores = self.get_filestores() |
187 | try: |
188 | - for (status, file, doc) in self.import_iter(*stores): |
189 | + for (status, file, ch) in self.import_iter(*stores): |
190 | self.doc['stats'][status]['count'] += 1 |
191 | self.doc['stats'][status]['bytes'] += file.size |
192 | self.doc['files'][file.name]['status'] = status |
193 | - if doc is not None: |
194 | - self.db.save(doc) |
195 | - self.doc['files'][file.name]['id'] = doc['_id'] |
196 | + if ch is not None: |
197 | + self.doc['files'][file.name]['id'] = ch.id |
198 | self.doc['time_end'] = time.time() |
199 | self.doc['rate'] = get_rate(self.doc) |
200 | finally: |
201 | self.db.save(self.doc) |
202 | + self.extraction_queue.put(None) |
203 | + extractor.join() |
204 | + if self.thumbnail: |
205 | + self.doc['_attachments'] = { |
206 | + 'thumbnail': encode_attachment(self.thumbnail) |
207 | + } |
208 | + self.db.save(self.doc) |
209 | + del self.doc['_rev'] |
210 | + self.project.post(self.doc) |
211 | self.emit('finished', self.id, self.doc['stats']) |
212 | |
213 | def import_iter(self, *filestores): |
214 | - need_thumbnail = True |
215 | + common = { |
216 | + 'import_id': self.id, |
217 | + 'batch_id': self.env.get('batch_id'), |
218 | + 'machine_id': self.env.get('machine_id'), |
219 | + 'project_id': self.env.get('project_id'), |
220 | + } |
221 | for (file, ch) in batch_import_iter(self.batch, *filestores, |
222 | callback=self.progress_callback |
223 | ): |
224 | @@ -242,49 +270,12 @@ |
225 | assert file.size == 0 |
226 | yield ('empty', file, None) |
227 | continue |
228 | - |
229 | - common = { |
230 | - 'import': { |
231 | - 'import_id': self.id, |
232 | - 'machine_id': self.env.get('machine_id'), |
233 | - 'batch_id': self.env.get('batch_id'), |
234 | - 'project_id': self.env.get('project_id'), |
235 | - 'src': file.name, |
236 | - 'mtime': file.mtime, |
237 | - }, |
238 | - 'meta': {}, |
239 | - 'ctime': file.mtime, |
240 | - 'name': path.basename(file.name), |
241 | - } |
242 | - ext = normalize_ext(file.name) |
243 | - if ext: |
244 | - common['ext'] = ext |
245 | - extract(file.name, common) |
246 | - |
247 | - # Project doc |
248 | - try: |
249 | - doc = self.project.get(ch.id) |
250 | - except microfiber.NotFound: |
251 | - doc = schema.create_project_file( |
252 | - ch.id, ch.file_size, ch.leaf_hashes |
253 | - ) |
254 | - doc.update(common) |
255 | - merge_thumbnail(file.name, doc) |
256 | - log.info('adding to %r', self.project) |
257 | - self.project.save(doc) |
258 | - if need_thumbnail and 'thumbnail' in doc['_attachments']: |
259 | - (content_type, data) = self.project.get_att(ch.id, 'thumbnail') |
260 | - self.db.save(self.doc) |
261 | - self.db.put_att(content_type, data, self.id, 'thumbnail', |
262 | - rev=self.doc['_rev'] |
263 | - ) |
264 | - self.doc = self.db.get(self.id) |
265 | - self.emit('import_thumbnail', self.id, ch.id) |
266 | - need_thumbnail = False |
267 | - |
268 | - # Core doc |
269 | + timestamp = time.time() |
270 | + self.extraction_queue.put((timestamp, file, ch)) |
271 | + log_doc = schema.create_log(timestamp, ch, file, **common) |
272 | stored = dict( |
273 | - (fs.id, |
274 | + ( |
275 | + fs.id, |
276 | { |
277 | 'copies': fs.copies, |
278 | 'mtime': fs.stat(ch.id).mtime, |
279 | @@ -294,14 +285,15 @@ |
280 | ) |
281 | try: |
282 | doc = self.db.get(ch.id) |
283 | - doc['stored'].update(stored) |
284 | - yield ('duplicate', file, doc) |
285 | - except microfiber.NotFound: |
286 | - doc = schema.create_file( |
287 | - ch.id, ch.file_size, ch.leaf_hashes, stored |
288 | - ) |
289 | - doc.update(common) |
290 | - yield ('new', file, doc) |
291 | + doc['origin'] = 'user' |
292 | + doc['atime'] = int(timestamp) |
293 | + merge_stored(doc['stored'], stored) |
294 | + self.db.save_many([log_doc, doc]) |
295 | + yield ('duplicate', file, ch) |
296 | + except NotFound: |
297 | + doc = schema.create_file(timestamp, ch, stored) |
298 | + self.db.save_many([log_doc, doc]) |
299 | + yield ('new', file, ch) |
300 | |
301 | def progress_callback(self, count, size): |
302 | self.emit('progress', self.id, |
303 | @@ -309,6 +301,36 @@ |
304 | size, self.batch.size |
305 | ) |
306 | |
307 | + def extractor(self): |
308 | + try: |
309 | + need_thumbnail = True |
310 | + common = { |
311 | + 'import_id': self.id, |
312 | + 'batch_id': self.env.get('batch_id'), |
313 | + 'machine_id': self.env.get('machine_id'), |
314 | + } |
315 | + while True: |
316 | + item = self.extraction_queue.get() |
317 | + if item is None: |
318 | + break |
319 | + (timestamp, file, ch) = item |
320 | + try: |
321 | + doc = self.project.get(ch.id) |
322 | + except NotFound: |
323 | + doc = schema.create_project_file(timestamp, ch, file) |
324 | + ext = normalize_ext(file.name) |
325 | + if ext: |
326 | + doc['ext'] = ext |
327 | + extract(file.name, doc) |
328 | + merge_thumbnail(file.name, doc) |
329 | + doc.update(common) |
330 | + self.project.save(doc) |
331 | + if need_thumbnail and has_attachment(doc, 'thumbnail'): |
332 | + need_thumbnail = False |
333 | + self.thumbnail = self.project.get_att(ch.id, 'thumbnail') |
334 | + self.emit('import_thumbnail', self.id, ch.id) |
335 | + except Exception: |
336 | + log.exception('Error in extractor thread:') |
337 | |
338 | |
339 | class ImportManager(workers.CouchManager): |
340 | |
341 | === modified file 'dmedia/metastore.py' |
342 | --- dmedia/metastore.py 2012-07-01 12:50:50 +0000 |
343 | +++ dmedia/metastore.py 2012-11-21 12:55:46 +0000 |
344 | @@ -28,7 +28,7 @@ |
345 | import logging |
346 | |
347 | from filestore import CorruptFile, FileNotFound, check_root_hash |
348 | -from microfiber import NotFound |
349 | +from microfiber import NotFound, id_slice_iter |
350 | |
351 | from .util import get_db |
352 | |
353 | @@ -108,14 +108,17 @@ |
354 | |
355 | |
356 | def mark_mismatch(doc, fs): |
357 | + """ |
358 | + Update mtime and copies, delete verified, preserve pinned. |
359 | + """ |
360 | _id = doc['_id'] |
361 | stored = get_dict(doc, 'stored') |
362 | - new = { |
363 | - 'mtime': fs.stat(_id).mtime, |
364 | - 'copies': 0, |
365 | - 'verified': 0, |
366 | - } |
367 | - update(stored, fs.id, new) |
368 | + value = get_dict(stored, fs.id) |
369 | + value.update( |
370 | + mtime=fs.stat(_id).mtime, |
371 | + copies=0, |
372 | + ) |
373 | + value.pop('verified', None) |
374 | |
375 | |
376 | class VerifyContext: |
377 | @@ -154,10 +157,13 @@ |
378 | if exc_type is None: |
379 | return |
380 | if issubclass(exc_type, FileNotFound): |
381 | + log.warning('%s is not in %r', self.doc['_id'], self.fs) |
382 | remove_from_stores(self.doc, self.fs) |
383 | elif issubclass(exc_type, CorruptFile): |
384 | + log.warning('%s has wrong size in %r', self.doc['_id'], self.fs) |
385 | mark_corrupt(self.doc, self.fs, time.time()) |
386 | elif issubclass(exc_type, MTimeMismatch): |
387 | + log.warning('%s has wrong mtime in %r', self.doc['_id'], self.fs) |
388 | mark_mismatch(self.doc, self.fs) |
389 | else: |
390 | return False |
391 | @@ -165,6 +171,17 @@ |
392 | return True |
393 | |
394 | |
395 | +def relink_iter(fs, count=25): |
396 | + buf = [] |
397 | + for st in fs: |
398 | + buf.append(st) |
399 | + if len(buf) >= count: |
400 | + yield buf |
401 | + buf = [] |
402 | + if buf: |
403 | + yield buf |
404 | + |
405 | + |
406 | class MetaStore: |
407 | def __init__(self, db): |
408 | self.db = db |
409 | @@ -172,45 +189,99 @@ |
410 | def __repr__(self): |
411 | return '{}({!r})'.format(self.__class__.__name__, self.db) |
412 | |
413 | + def scan(self, fs): |
414 | + """ |
415 | + Make sure files we expect to be in the file-store *fs* actually are. |
416 | + |
417 | + A fundamental design tenet of Dmedia is that it doesn't particularly |
418 | + trust its metadata, and instead does frequent reality checks. This |
419 | + allows Dmedia to work even though removable storage is constantly |
420 | + "offline". In other distributed file-systems, this is usually called |
421 | + being in a "network-partitioned" state. |
422 | + |
423 | + Dmedia deals with removable storage via a quickly decaying confidence |
424 | + in its metadata. If a removable drive hasn't been connected longer |
425 | + than some threshold, Dmedia will update all those copies to count for |
426 | + zero durability. |
427 | + |
428 | + And whenever a removable drive (on any drive for that matter) is |
429 | + connected, Dmedia immediately checks to see what files are actually on |
430 | + the drive, and whether they have good integrity. |
431 | + |
432 | + `MetaStore.scan()` is the most important reality check that Dmedia does |
433 | + because it's fast and can therefor be done quite often. Thousands of |
434 | + files can be scanned in a few seconds. |
435 | + |
436 | + The scan insures that for every file expected in this file-store, the |
437 | + file exists, has the correct size, and the expected mtime. |
438 | + |
439 | + If the file doesn't exist in this file-store, its store_id is deleted |
440 | + from doc['stored'] and the doc is saved. |
441 | + |
442 | + If the file has the wrong size, it's moved into the corrupt location in |
443 | + the file-store. Then the doc is updated accordingly marking the file as |
444 | + being corrupt in this file-store, and the doc is saved. |
445 | + |
446 | + If the file doesn't have the expected mtime is this file-store, this |
447 | + copy gets downgraded to zero copies worth of durability, and the last |
448 | + verification timestamp is deleted, if present. This will put the file |
449 | + first in line for full content-hash verification. If the verification |
450 | + passes, the durability is raised back to the appropriate number of |
451 | + copies. |
452 | + |
453 | + :param fs: a `FileStore` instance |
454 | + """ |
455 | + start = time.time() |
456 | + log.info('Scanning FileStore %s at %r', fs.id, fs.parentdir) |
457 | + rows = self.db.view('file', 'stored', key=fs.id)['rows'] |
458 | + for ids in id_slice_iter(rows): |
459 | + for doc in self.db.get_many(ids): |
460 | + _id = doc['_id'] |
461 | + with ScanContext(self.db, fs, doc): |
462 | + st = fs.stat(_id) |
463 | + if st.size != doc.get('bytes'): |
464 | + src_fp = open(st.name, 'rb') |
465 | + raise fs.move_to_corrupt(src_fp, _id, |
466 | + file_size=doc['bytes'], |
467 | + bad_file_size=st.size, |
468 | + ) |
469 | + stored = get_dict(doc, 'stored') |
470 | + s = get_dict(stored, fs.id) |
471 | + if st.mtime != s['mtime']: |
472 | + raise MTimeMismatch() |
473 | + # Update the atime for the dmedia/store doc |
474 | + try: |
475 | + doc = self.db.get(fs.id) |
476 | + assert doc['type'] == 'dmedia/store' |
477 | + doc['atime'] = int(time.time()) |
478 | + self.db.save(doc) |
479 | + log.info('Updated FileStore %s atime to %r', fs.id, doc['atime']) |
480 | + except NotFound: |
481 | + log.warning('No doc for FileStore %s', fs.id) |
482 | + log.info('%.3f to scan %r', time.time() - start, fs) |
483 | + |
484 | def relink(self, fs): |
485 | + """ |
486 | + Find known files that we didn't expect in `FileStore` *fs*. |
487 | + """ |
488 | + start = time.time() |
489 | log.info('Relinking FileStore %r at %r', fs.id, fs.parentdir) |
490 | - for st in fs: |
491 | - try: |
492 | - doc = self.db.get(st.id) |
493 | - except NotFound: |
494 | - continue |
495 | - stored = get_dict(doc, 'stored') |
496 | - s = get_dict(stored, fs.id) |
497 | - if s.get('mtime') == st.mtime: |
498 | - continue |
499 | - new = { |
500 | - 'mtime': st.mtime, |
501 | - 'verified': 0, |
502 | - 'copies': (0 if 'mtime' in s else fs.copies), |
503 | - } |
504 | - s.update(new) |
505 | - self.db.save(doc) |
506 | - |
507 | - def scan(self, fs): |
508 | - log.info('Scanning FileStore %r at %r', fs.id, fs.parentdir) |
509 | - v = self.db.view('file', 'stored', key=fs.id, reduce=False) |
510 | - for row in v['rows']: |
511 | - _id = row['id'] |
512 | - doc = self.db.get(_id) |
513 | - leaf_hashes = self.db.get_att(_id, 'leaf_hashes')[1] |
514 | - check_root_hash(_id, doc['bytes'], leaf_hashes) |
515 | - with ScanContext(self.db, fs, doc): |
516 | - st = fs.stat(_id) |
517 | - if st.size != doc['bytes']: |
518 | - src_fp = open(st.name, 'rb') |
519 | - raise fs.move_to_corrupt(src_fp, _id, |
520 | - file_size=doc['bytes'], |
521 | - bad_file_size=st.size, |
522 | - ) |
523 | + for buf in relink_iter(fs): |
524 | + docs = self.db.get_many([st.id for st in buf]) |
525 | + for (st, doc) in zip(buf, docs): |
526 | + if doc is None: |
527 | + continue |
528 | stored = get_dict(doc, 'stored') |
529 | - s = get_dict(stored, fs.id) |
530 | - if st.mtime != s['mtime']: |
531 | - raise MTimeMismatch() |
532 | + value = get_dict(stored, fs.id) |
533 | + if value: |
534 | + continue |
535 | + log.info('Relinking %s in %r', st.id, fs) |
536 | + value.update( |
537 | + mtime=st.mtime, |
538 | + copies=fs.copies, |
539 | + ) |
540 | + self.db.save(doc) |
541 | + log.info('%.3f to relink %r', time.time() - start, fs) |
542 | |
543 | def remove(self, fs, _id): |
544 | doc = self.db.get(_id) |
545 | |
546 | === modified file 'dmedia/schema.py' |
547 | --- dmedia/schema.py 2012-11-14 00:37:47 +0000 |
548 | +++ dmedia/schema.py 2012-11-21 12:55:46 +0000 |
549 | @@ -245,9 +245,10 @@ |
550 | import re |
551 | import time |
552 | import socket |
553 | +import os |
554 | |
555 | from filestore import DIGEST_B32LEN, B32ALPHABET, TYPE_ERROR |
556 | -from microfiber import random_id, RANDOM_B32LEN |
557 | +from microfiber import random_id, RANDOM_B32LEN, encode_attachment, Attachment |
558 | |
559 | |
560 | # schema-compatibility version: |
561 | @@ -710,7 +711,7 @@ |
562 | _check(doc, ['stored', key, 'mtime'], (int, float), |
563 | (_at_least, 0), |
564 | ) |
565 | - _check_if_exists(doc, ['stored', key, 'verified'], (int, float), |
566 | + _check_if_exists(doc, ['stored', key, 'verified'], int, |
567 | (_at_least, 0), |
568 | ) |
569 | _check_if_exists(doc, ['stored', key, 'pinned'], bool, |
570 | @@ -739,22 +740,12 @@ |
571 | (_at_least, 0), |
572 | ) |
573 | |
574 | - # 'ext' like 'mov' |
575 | - _check_if_exists(doc, ['ext'], str, |
576 | - (_matches, EXT_PAT), |
577 | - ) |
578 | - |
579 | - # 'content_type' like 'video/quicktime' |
580 | - _check_if_exists(doc, ['content_type'], str) |
581 | - |
582 | # proxy_of |
583 | if doc['origin'] == 'proxy': |
584 | _check(doc, ['proxy_of'], str, |
585 | _intrinsic_id, |
586 | ) |
587 | |
588 | - check_file_optional(doc) |
589 | - |
590 | |
591 | def check_file_optional(doc): |
592 | |
593 | @@ -825,45 +816,57 @@ |
594 | ####################################################### |
595 | # Functions for creating specific types of dmedia docs: |
596 | |
597 | -def create_file(_id, file_size, leaf_hashes, stored, origin='user'): |
598 | + |
599 | +def create_log(timestamp, ch, file, **kw): |
600 | + doc = { |
601 | + '_id': ch.id[:4] + random_id()[4:], |
602 | + 'type': 'dmedia/log', |
603 | + 'time': timestamp, |
604 | + 'file_id': ch.id, |
605 | + 'bytes': ch.file_size, |
606 | + 'dir': os.path.dirname(file.name), |
607 | + 'name': os.path.basename(file.name), |
608 | + 'mtime': file.mtime, |
609 | + } |
610 | + doc.update(kw) |
611 | + return doc |
612 | + |
613 | + |
614 | +def create_file(timestamp, ch, stored, origin='user'): |
615 | """ |
616 | Create a minimal 'dmedia/file' document. |
617 | """ |
618 | - timestamp = time.time() |
619 | + leaf_hashes = Attachment('application/octet-stream', ch.leaf_hashes) |
620 | return { |
621 | - '_id': _id, |
622 | + '_id': ch.id, |
623 | '_attachments': { |
624 | - 'leaf_hashes': { |
625 | - 'data': b64encode(leaf_hashes).decode('utf-8'), |
626 | - 'content_type': 'application/octet-stream', |
627 | - } |
628 | + 'leaf_hashes': encode_attachment(leaf_hashes), |
629 | }, |
630 | 'type': 'dmedia/file', |
631 | 'time': timestamp, |
632 | 'atime': int(timestamp), |
633 | - 'bytes': file_size, |
634 | + 'bytes': ch.file_size, |
635 | 'origin': origin, |
636 | 'stored': stored, |
637 | } |
638 | |
639 | |
640 | -def create_project_file(_id, file_size, leaf_hashes, origin='user'): |
641 | +def create_project_file(timestamp, ch, file, origin='user'): |
642 | """ |
643 | Create a minimal 'dmedia/file' document. |
644 | """ |
645 | return { |
646 | - '_id': _id, |
647 | - '_attachments': { |
648 | - 'leaf_hashes': { |
649 | - 'data': b64encode(leaf_hashes).decode('utf-8'), |
650 | - 'content_type': 'application/octet-stream', |
651 | - } |
652 | - }, |
653 | + '_id': ch.id, |
654 | + '_attachments': {}, |
655 | 'type': 'dmedia/file', |
656 | - 'time': time.time(), |
657 | - 'bytes': file_size, |
658 | + 'time': timestamp, |
659 | + 'bytes': ch.file_size, |
660 | 'origin': origin, |
661 | + 'ctime': file.mtime, |
662 | + 'dir': os.path.dirname(file.name), |
663 | + 'name': os.path.basename(file.name), |
664 | 'tags': {}, |
665 | + 'meta': {}, |
666 | } |
667 | |
668 | |
669 | |
670 | === modified file 'dmedia/server.py' |
671 | --- dmedia/server.py 2012-10-24 23:23:30 +0000 |
672 | +++ dmedia/server.py 2012-11-21 12:55:46 +0000 |
673 | @@ -31,7 +31,7 @@ |
674 | import logging |
675 | |
676 | from filestore import DIGEST_B32LEN, B32ALPHABET, LEAF_SIZE |
677 | -from microfiber import dumps, basic_auth_header, CouchBase |
678 | +from microfiber import dumps, basic_auth_header, CouchBase, dumps |
679 | |
680 | import dmedia |
681 | from dmedia import __version__ |
682 | @@ -46,7 +46,7 @@ |
683 | |
684 | def iter_headers(environ): |
685 | for (key, value) in environ.items(): |
686 | - if key in ('CONTENT_LENGHT', 'CONTENT_TYPE'): |
687 | + if key in ('CONTENT_LENGTH', 'CONTENT_TYPE'): |
688 | yield (key.replace('_', '-').lower(), value) |
689 | elif key.startswith('HTTP_'): |
690 | yield (key[5:].replace('_', '-').lower(), value) |
691 | @@ -58,7 +58,11 @@ |
692 | body = environ['wsgi.input'].read() |
693 | else: |
694 | body = None |
695 | - return (environ['REQUEST_METHOD'], environ['PATH_INFO'], body, headers) |
696 | + path = environ['PATH_INFO'] |
697 | + query = environ['QUERY_STRING'] |
698 | + if query: |
699 | + path = '?'.join([path, query]) |
700 | + return (environ['REQUEST_METHOD'], path, body, headers) |
701 | |
702 | |
703 | def get_slice(environ): |
704 | |
705 | === modified file 'dmedia/tests/test_extractor.py' |
706 | --- dmedia/tests/test_extractor.py 2012-08-04 01:26:03 +0000 |
707 | +++ dmedia/tests/test_extractor.py 2012-11-21 12:55:46 +0000 |
708 | @@ -28,7 +28,7 @@ |
709 | from os import path |
710 | from subprocess import CalledProcessError |
711 | |
712 | -from microfiber import random_id |
713 | +from microfiber import random_id, Attachment |
714 | |
715 | from .base import TempDir, SampleFilesTestCase, MagicLanternTestCase |
716 | |
717 | @@ -612,12 +612,11 @@ |
718 | } |
719 | ) |
720 | |
721 | - |
722 | def test_thumbnail_video(self): |
723 | # Test with sample_mov from 5D Mark II: |
724 | tmp = TempDir() |
725 | t = extractor.thumbnail_video(self.mov, tmp.dir) |
726 | - self.assertIsInstance(t, extractor.Thumbnail) |
727 | + self.assertIsInstance(t, Attachment) |
728 | self.assertEqual(t.content_type, 'image/jpeg') |
729 | self.assertIsInstance(t.data, bytes) |
730 | self.assertGreater(len(t.data), 5000) |
731 | @@ -644,7 +643,7 @@ |
732 | # Test with sample_thm from 5D Mark II: |
733 | tmp = TempDir() |
734 | t = extractor.thumbnail_image(self.thm, tmp.dir) |
735 | - self.assertIsInstance(t, extractor.Thumbnail) |
736 | + self.assertIsInstance(t, Attachment) |
737 | self.assertEqual(t.content_type, 'image/jpeg') |
738 | self.assertIsInstance(t.data, bytes) |
739 | self.assertGreater(len(t.data), 5000) |
740 | @@ -667,7 +666,7 @@ |
741 | def test_create_thumbnail(self): |
742 | # Test with sample_mov from 5D Mark II: |
743 | t = extractor.create_thumbnail(self.mov, 'mov') |
744 | - self.assertIsInstance(t, extractor.Thumbnail) |
745 | + self.assertIsInstance(t, Attachment) |
746 | self.assertEqual(t.content_type, 'image/jpeg') |
747 | self.assertIsInstance(t.data, bytes) |
748 | self.assertGreater(len(t.data), 5000) |
749 | @@ -690,7 +689,7 @@ |
750 | def test_create_thumbnail(self): |
751 | # Test with sample_mov from 5D Mark II: |
752 | t = extractor.create_thumbnail(self.mov, 'mov') |
753 | - self.assertIsInstance(t, extractor.Thumbnail) |
754 | + self.assertIsInstance(t, Attachment) |
755 | self.assertEqual(t.content_type, 'image/jpeg') |
756 | self.assertIsInstance(t.data, bytes) |
757 | self.assertGreater(len(t.data), 5000) |
758 | @@ -710,15 +709,6 @@ |
759 | nope = tmp.join('nope.mov') |
760 | self.assertIsNone(extractor.create_thumbnail(nope, 'mov')) |
761 | |
762 | - def test_to_attachment(self): |
763 | - data = os.urandom(2000) |
764 | - thm = extractor.Thumbnail('image/png', data) |
765 | - d = extractor.to_attachment(thm) |
766 | - self.assertIsInstance(d, dict) |
767 | - self.assertEqual(set(d), set(['content_type', 'data'])) |
768 | - self.assertEqual(d['content_type'], 'image/png') |
769 | - self.assertEqual(d['data'], b64encode(data).decode('utf-8')) |
770 | - |
771 | def test_get_thumbnail_func(self): |
772 | f = extractor.get_thumbnail_func |
773 | self.assertIsNone(f({})) |
774 | @@ -833,6 +823,4 @@ |
775 | extensions, |
776 | set(extractor.NO_EXTRACT) |
777 | ) |
778 | - |
779 | - |
780 | - |
781 | + |
782 | |
783 | === modified file 'dmedia/tests/test_importer.py' |
784 | --- dmedia/tests/test_importer.py 2012-08-06 11:00:34 +0000 |
785 | +++ dmedia/tests/test_importer.py 2012-11-21 12:55:46 +0000 |
786 | @@ -200,6 +200,123 @@ |
787 | (6, 8, 10, 12) |
788 | ) |
789 | |
790 | + def test_merge_stored(self): |
791 | + id1 = random_id() |
792 | + id2 = random_id() |
793 | + id3 = random_id() |
794 | + ts1 = time.time() |
795 | + ts2 = time.time() - 2.5 |
796 | + ts3 = time.time() - 5 |
797 | + new = { |
798 | + id1: { |
799 | + 'copies': 2, |
800 | + 'mtime': ts1, |
801 | + }, |
802 | + id2: { |
803 | + 'copies': 1, |
804 | + 'mtime': ts2, |
805 | + }, |
806 | + } |
807 | + |
808 | + old = {} |
809 | + self.assertIsNone(importer.merge_stored(old, deepcopy(new))) |
810 | + self.assertEqual(old, new) |
811 | + |
812 | + old = { |
813 | + id3: { |
814 | + 'copies': 1, |
815 | + 'mtime': ts3, |
816 | + 'verified': int(ts3 + 100), |
817 | + } |
818 | + } |
819 | + self.assertIsNone(importer.merge_stored(old, deepcopy(new))) |
820 | + self.assertEqual(old, |
821 | + { |
822 | + id1: { |
823 | + 'copies': 2, |
824 | + 'mtime': ts1, |
825 | + }, |
826 | + id2: { |
827 | + 'copies': 1, |
828 | + 'mtime': ts2, |
829 | + }, |
830 | + id3: { |
831 | + 'copies': 1, |
832 | + 'mtime': ts3, |
833 | + 'verified': int(ts3 + 100), |
834 | + } |
835 | + } |
836 | + ) |
837 | + |
838 | + old = { |
839 | + id1: { |
840 | + 'copies': 1, |
841 | + 'mtime': ts1 - 100, |
842 | + 'verified': ts1 - 50, # Should be removed |
843 | + }, |
844 | + id2: { |
845 | + 'copies': 2, |
846 | + 'mtime': ts2 - 200, |
847 | + 'pinned': True, # Should be preserved |
848 | + }, |
849 | + } |
850 | + self.assertIsNone(importer.merge_stored(old, deepcopy(new))) |
851 | + self.assertEqual(old, |
852 | + { |
853 | + id1: { |
854 | + 'copies': 2, |
855 | + 'mtime': ts1, |
856 | + }, |
857 | + id2: { |
858 | + 'copies': 1, |
859 | + 'mtime': ts2, |
860 | + 'pinned': True, |
861 | + }, |
862 | + } |
863 | + ) |
864 | + |
865 | + old = { |
866 | + id1: { |
867 | + 'copies': 1, |
868 | + 'mtime': ts1 - 100, |
869 | + 'pinned': True, # Should be preserved |
870 | + 'verified': ts1 - 50, # Should be removed |
871 | + }, |
872 | + id2: { |
873 | + 'copies': 2, |
874 | + 'mtime': ts2 - 200, |
875 | + 'verified': ts1 - 50, # Should be removed |
876 | + 'pinned': True, # Should be preserved |
877 | + }, |
878 | + id3: { |
879 | + 'copies': 1, |
880 | + 'mtime': ts3, |
881 | + 'verified': int(ts3 + 100), |
882 | + 'pinned': True, |
883 | + }, |
884 | + } |
885 | + self.assertIsNone(importer.merge_stored(old, deepcopy(new))) |
886 | + self.assertEqual(old, |
887 | + { |
888 | + id1: { |
889 | + 'copies': 2, |
890 | + 'mtime': ts1, |
891 | + 'pinned': True, |
892 | + }, |
893 | + id2: { |
894 | + 'copies': 1, |
895 | + 'mtime': ts2, |
896 | + 'pinned': True, |
897 | + }, |
898 | + id3: { |
899 | + 'copies': 1, |
900 | + 'mtime': ts3, |
901 | + 'verified': int(ts3 + 100), |
902 | + 'pinned': True, |
903 | + }, |
904 | + } |
905 | + ) |
906 | + |
907 | |
908 | class ImportCase(CouchCase): |
909 | |
910 | @@ -341,9 +458,9 @@ |
911 | for (file, ch) in result: |
912 | doc = self.db.get(ch.id) |
913 | schema.check_file(doc) |
914 | - self.assertEqual(doc['import']['import_id'], inst.id) |
915 | - self.assertEqual(doc['import']['batch_id'], self.batch_id) |
916 | - self.assertEqual(doc['ctime'], file.mtime) |
917 | + #self.assertEqual(doc['import']['import_id'], inst.id) |
918 | + #self.assertEqual(doc['import']['batch_id'], self.batch_id) |
919 | + #self.assertEqual(doc['ctime'], file.mtime) |
920 | self.assertEqual(doc['bytes'], ch.file_size) |
921 | (content_type, leaf_hashes) = self.db.get_att(ch.id, 'leaf_hashes') |
922 | self.assertEqual(content_type, 'application/octet-stream') |
923 | @@ -813,9 +930,9 @@ |
924 | doc = self.db.get(ch.id) |
925 | schema.check_file(doc) |
926 | self.assertTrue(doc['_rev'].startswith('1-')) |
927 | - self.assertEqual(doc['import']['import_id'], import_id) |
928 | - self.assertEqual(doc['import']['batch_id'], batch_id) |
929 | - self.assertEqual(doc['ctime'], file.mtime) |
930 | + #self.assertEqual(doc['import']['import_id'], import_id) |
931 | + #self.assertEqual(doc['import']['batch_id'], batch_id) |
932 | + #self.assertEqual(doc['ctime'], file.mtime) |
933 | self.assertEqual(doc['bytes'], file.size) |
934 | (content_type, leaf_hashes) = self.db.get_att(ch.id, 'leaf_hashes') |
935 | self.assertEqual(content_type, 'application/octet-stream') |
936 | @@ -907,9 +1024,9 @@ |
937 | doc = self.db.get(ch.id) |
938 | schema.check_file(doc) |
939 | self.assertTrue(doc['_rev'].startswith('2-')) |
940 | - self.assertNotEqual(doc['import']['import_id'], import_id) |
941 | - self.assertNotEqual(doc['import']['batch_id'], batch_id) |
942 | - self.assertEqual(doc['ctime'], file.mtime) |
943 | + #self.assertNotEqual(doc['import']['import_id'], import_id) |
944 | + #self.assertNotEqual(doc['import']['batch_id'], batch_id) |
945 | + #self.assertEqual(doc['ctime'], file.mtime) |
946 | self.assertEqual(doc['bytes'], file.size) |
947 | (content_type, leaf_hashes) = self.db.get_att(ch.id, 'leaf_hashes') |
948 | self.assertEqual(content_type, 'application/octet-stream') |
949 | |
950 | === modified file 'dmedia/tests/test_local.py' |
951 | --- dmedia/tests/test_local.py 2012-07-09 18:28:44 +0000 |
952 | +++ dmedia/tests/test_local.py 2012-11-21 12:55:46 +0000 |
953 | @@ -25,6 +25,7 @@ |
954 | |
955 | from unittest import TestCase |
956 | from random import Random |
957 | +import time |
958 | |
959 | import filestore |
960 | from filestore import FileStore, DIGEST_B32LEN, DIGEST_BYTES |
961 | @@ -259,7 +260,7 @@ |
962 | self.assertEqual(cm.exception.id, ch.id) |
963 | |
964 | # When doc does exist |
965 | - doc = schema.create_file(ch.id, ch.file_size, ch.leaf_hashes, {}) |
966 | + doc = schema.create_file(time.time(), ch, {}) |
967 | inst.db.save(doc) |
968 | self.assertEqual(inst.content_hash(ch.id), unpacked) |
969 | self.assertEqual(inst.content_hash(ch.id, False), ch) |
970 | |
971 | === modified file 'dmedia/tests/test_metastore.py' |
972 | --- dmedia/tests/test_metastore.py 2012-01-28 21:43:12 +0000 |
973 | +++ dmedia/tests/test_metastore.py 2012-11-21 12:55:46 +0000 |
974 | @@ -25,11 +25,17 @@ |
975 | |
976 | from unittest import TestCase |
977 | import time |
978 | +import os |
979 | +from random import SystemRandom |
980 | |
981 | +from filestore import FileStore, DIGEST_BYTES |
982 | from microfiber import random_id |
983 | |
984 | +from dmedia.tests.base import TempDir |
985 | from dmedia import metastore |
986 | |
987 | +random = SystemRandom() |
988 | + |
989 | |
990 | class DummyStat: |
991 | def __init__(self, mtime): |
992 | @@ -255,4 +261,91 @@ |
993 | 'corrupt': {id3: 'baz', fs.id: {'time': ts}}, |
994 | } |
995 | ) |
996 | - |
997 | + |
998 | + def test_relink_iter(self): |
999 | + tmp = TempDir() |
1000 | + fs = FileStore(tmp.dir) |
1001 | + |
1002 | + def create(): |
1003 | + _id = random_id(DIGEST_BYTES) |
1004 | + data = b'N' * random.randint(1, 1776) |
1005 | + open(fs.path(_id), 'wb').write(data) |
1006 | + st = fs.stat(_id) |
1007 | + assert st.size == len(data) |
1008 | + return st |
1009 | + |
1010 | + # Test when empty |
1011 | + self.assertEqual( |
1012 | + list(metastore.relink_iter(fs)), |
1013 | + [] |
1014 | + ) |
1015 | + |
1016 | + # Test with only 1 |
1017 | + items = [create()] |
1018 | + self.assertEqual( |
1019 | + list(metastore.relink_iter(fs)), |
1020 | + [items] |
1021 | + ) |
1022 | + |
1023 | + # Test with 25 |
1024 | + items.extend(create() for i in range(24)) |
1025 | + assert len(items) == 25 |
1026 | + items.sort(key=lambda st: st.id) |
1027 | + self.assertEqual( |
1028 | + list(metastore.relink_iter(fs)), |
1029 | + [items] |
1030 | + ) |
1031 | + |
1032 | + # Test with 26 |
1033 | + items.append(create()) |
1034 | + assert len(items) == 26 |
1035 | + items.sort(key=lambda st: st.id) |
1036 | + self.assertEqual( |
1037 | + list(metastore.relink_iter(fs)), |
1038 | + [ |
1039 | + items[:25], |
1040 | + items[25:], |
1041 | + ] |
1042 | + ) |
1043 | + |
1044 | + # Test with 49 |
1045 | + items.extend(create() for i in range(23)) |
1046 | + assert len(items) == 49 |
1047 | + items.sort(key=lambda st: st.id) |
1048 | + self.assertEqual( |
1049 | + list(metastore.relink_iter(fs)), |
1050 | + [ |
1051 | + items[:25], |
1052 | + items[25:], |
1053 | + ] |
1054 | + ) |
1055 | + |
1056 | + # Test with 100 |
1057 | + items.extend(create() for i in range(51)) |
1058 | + assert len(items) == 100 |
1059 | + items.sort(key=lambda st: st.id) |
1060 | + self.assertEqual( |
1061 | + list(metastore.relink_iter(fs)), |
1062 | + [ |
1063 | + items[0:25], |
1064 | + items[25:50], |
1065 | + items[50:75], |
1066 | + items[75:100], |
1067 | + ] |
1068 | + ) |
1069 | + |
1070 | + # Test with 118 |
1071 | + items.extend(create() for i in range(18)) |
1072 | + assert len(items) == 118 |
1073 | + items.sort(key=lambda st: st.id) |
1074 | + self.assertEqual( |
1075 | + list(metastore.relink_iter(fs)), |
1076 | + [ |
1077 | + items[0:25], |
1078 | + items[25:50], |
1079 | + items[50:75], |
1080 | + items[75:100], |
1081 | + items[100:118], |
1082 | + ] |
1083 | + ) |
1084 | + |
1085 | |
1086 | === modified file 'dmedia/tests/test_schema.py' |
1087 | --- dmedia/tests/test_schema.py 2012-11-14 00:37:47 +0000 |
1088 | +++ dmedia/tests/test_schema.py 2012-11-21 12:55:46 +0000 |
1089 | @@ -30,7 +30,7 @@ |
1090 | from copy import deepcopy |
1091 | import time |
1092 | |
1093 | -from filestore import TYPE_ERROR, DIGEST_BYTES |
1094 | +from filestore import TYPE_ERROR, DIGEST_BYTES, ContentHash |
1095 | from microfiber import random_id |
1096 | |
1097 | from .base import TempDir |
1098 | @@ -331,6 +331,14 @@ |
1099 | str(cm.exception), |
1100 | "doc['stored']['MZZG2ZDSOQVSW2TEMVZG643F']['verified'] must be >= 0; got -1" |
1101 | ) |
1102 | + bad = deepcopy(good) |
1103 | + bad['stored']['MZZG2ZDSOQVSW2TEMVZG643F']['verified'] = 123.0 |
1104 | + with self.assertRaises(TypeError) as cm: |
1105 | + f(bad) |
1106 | + self.assertEqual( |
1107 | + str(cm.exception), |
1108 | + "doc['stored']['MZZG2ZDSOQVSW2TEMVZG643F']['verified']: need a <class 'int'>; got a <class 'float'>: 123.0" |
1109 | + ) |
1110 | |
1111 | # Test with invalid stored "pinned": |
1112 | bad = deepcopy(good) |
1113 | @@ -362,37 +370,6 @@ |
1114 | "doc['corrupt'] cannot be empty; got {}" |
1115 | ) |
1116 | |
1117 | - # ext |
1118 | - copy = deepcopy(good) |
1119 | - copy['ext'] = 'ogv' |
1120 | - self.assertIsNone(f(copy)) |
1121 | - copy['ext'] = 42 |
1122 | - with self.assertRaises(TypeError) as cm: |
1123 | - f(copy) |
1124 | - self.assertEqual( |
1125 | - str(cm.exception), |
1126 | - TYPE_ERROR.format("doc['ext']", str, int, 42) |
1127 | - ) |
1128 | - copy['ext'] = '.mov' |
1129 | - with self.assertRaises(ValueError) as cm: |
1130 | - f(copy) |
1131 | - self.assertEqual( |
1132 | - str(cm.exception), |
1133 | - "doc['ext']: '.mov' does not match '^[a-z0-9]+(\\\\.[a-z0-9]+)?$'" |
1134 | - ) |
1135 | - |
1136 | - # content_type |
1137 | - copy = deepcopy(good) |
1138 | - copy['content_type'] = 'video/quicktime' |
1139 | - self.assertIsNone(f(copy)) |
1140 | - copy['content_type'] = 42 |
1141 | - with self.assertRaises(TypeError) as cm: |
1142 | - f(copy) |
1143 | - self.assertEqual( |
1144 | - str(cm.exception), |
1145 | - TYPE_ERROR.format("doc['content_type']", str, int, 42) |
1146 | - ) |
1147 | - |
1148 | # proxy_of |
1149 | copy = deepcopy(good) |
1150 | copy['origin'] = 'proxy' |
1151 | @@ -571,13 +548,15 @@ |
1152 | ) |
1153 | |
1154 | def test_create_file(self): |
1155 | + timestamp = time.time() |
1156 | _id = random_id(DIGEST_BYTES) |
1157 | + file_size = 31415 |
1158 | leaf_hashes = os.urandom(DIGEST_BYTES) |
1159 | - file_size = 31415 |
1160 | + ch = ContentHash(_id, file_size, leaf_hashes) |
1161 | store_id = random_id() |
1162 | stored = {store_id: {'copies': 2, 'mtime': 1234567890}} |
1163 | |
1164 | - doc = schema.create_file(_id, file_size, leaf_hashes, stored) |
1165 | + doc = schema.create_file(timestamp, ch, stored) |
1166 | schema.check_file(doc) |
1167 | self.assertEqual( |
1168 | set(doc), |
1169 | @@ -603,8 +582,8 @@ |
1170 | } |
1171 | ) |
1172 | self.assertEqual(doc['type'], 'dmedia/file') |
1173 | - self.assertLessEqual(doc['time'], time.time()) |
1174 | - self.assertEqual(doc['atime'], int(doc['time'])) |
1175 | + self.assertEqual(doc['time'], timestamp) |
1176 | + self.assertEqual(doc['atime'], int(timestamp)) |
1177 | self.assertEqual(doc['bytes'], file_size) |
1178 | self.assertEqual(doc['origin'], 'user') |
1179 | self.assertIs(doc['stored'], stored) |
1180 | @@ -616,9 +595,7 @@ |
1181 | self.assertEqual(s[store_id]['copies'], 2) |
1182 | self.assertEqual(s[store_id]['mtime'], 1234567890) |
1183 | |
1184 | - doc = schema.create_file(_id, file_size, leaf_hashes, stored, |
1185 | - origin='proxy' |
1186 | - ) |
1187 | + doc = schema.create_file(timestamp, ch, stored, origin='proxy') |
1188 | doc['proxy_of'] = random_id(DIGEST_BYTES) |
1189 | schema.check_file(doc) |
1190 | self.assertEqual(doc['origin'], 'proxy') |
1191 | |
1192 | === modified file 'dmedia/tests/test_server.py' |
1193 | --- dmedia/tests/test_server.py 2012-10-24 23:23:30 +0000 |
1194 | +++ dmedia/tests/test_server.py 2012-11-21 12:55:46 +0000 |
1195 | @@ -271,6 +271,7 @@ |
1196 | environ = { |
1197 | 'REQUEST_METHOD': 'GET', |
1198 | 'PATH_INFO': '/_config/foo', |
1199 | + 'QUERY_STRING': '', |
1200 | 'wsgi.input': Input(None, {'REQUEST_METHOD': 'GET'}), |
1201 | } |
1202 | with self.assertRaises(WSGIError) as cm: |
1203 | @@ -603,7 +604,7 @@ |
1204 | docs = [{'_id': random_id()} for i in range(100)] |
1205 | for doc in docs: |
1206 | doc['_rev'] = s1.post(doc, name1)['rev'] |
1207 | - time.sleep(0.5) |
1208 | + time.sleep(1) |
1209 | for doc in docs: |
1210 | self.assertEqual(s2.get(name2, doc['_id']), doc) |
1211 | |
1212 | |
1213 | === modified file 'dmedia/tests/test_transfers.py' |
1214 | --- dmedia/tests/test_transfers.py 2011-10-17 22:50:22 +0000 |
1215 | +++ dmedia/tests/test_transfers.py 2012-11-21 12:55:46 +0000 |
1216 | @@ -60,10 +60,10 @@ |
1217 | assert chunk is not None |
1218 | assert self._chunk is None |
1219 | self._chunk = chunk |
1220 | - |
1221 | + |
1222 | |
1223 | def create_file_doc(ch, store_id): |
1224 | - return schema.create_file(ch.id, ch.file_size, ch.leaf_hashes, |
1225 | + return schema.create_file(time.time(), ch, |
1226 | {store_id: {'mtime': 123456789, 'copies': 1}} |
1227 | ) |
1228 | |
1229 | |
1230 | === modified file 'dmedia/tests/test_verification.py' |
1231 | --- dmedia/tests/test_verification.py 2012-08-07 00:14:58 +0000 |
1232 | +++ dmedia/tests/test_verification.py 2012-11-21 12:55:46 +0000 |
1233 | @@ -27,7 +27,7 @@ |
1234 | import time |
1235 | from os import path |
1236 | |
1237 | -from filestore import FileStore, DIGEST_BYTES |
1238 | +from filestore import FileStore, DIGEST_BYTES, ContentHash |
1239 | from microfiber import random_id |
1240 | |
1241 | from .couch import CouchCase |
1242 | @@ -68,7 +68,7 @@ |
1243 | stored = { |
1244 | fs.id: {'mtime': fs.stat(ch.id).mtime, 'copies': fs.copies} |
1245 | } |
1246 | - doc = create_file(ch.id, ch.file_size, ch.leaf_hashes, stored) |
1247 | + doc = create_file(time.time(), ch, stored) |
1248 | self.db.save(doc) |
1249 | good.append(ch.id) |
1250 | |
1251 | @@ -81,7 +81,8 @@ |
1252 | stored = { |
1253 | fs.id: {'mtime': fs.stat(_id).mtime, 'copies': fs.copies} |
1254 | } |
1255 | - doc = create_file(_id, ch.file_size, ch.leaf_hashes, stored) |
1256 | + ch = ContentHash(_id, ch.file_size, ch.leaf_hashes) |
1257 | + doc = create_file(time.time(), ch, stored) |
1258 | self.db.save(doc) |
1259 | bad.append(_id) |
1260 | |
1261 | @@ -94,7 +95,7 @@ |
1262 | stored = { |
1263 | fs.id: {'mtime': path.getmtime(fs.path(ch.id)), 'copies': fs.copies} |
1264 | } |
1265 | - doc = create_file(ch.id, ch.file_size, ch.leaf_hashes, stored) |
1266 | + doc = create_file(time.time(), ch, stored) |
1267 | self.db.save(doc) |
1268 | empty.append(ch.id) |
1269 | |
1270 | @@ -105,7 +106,7 @@ |
1271 | stored = { |
1272 | fs.id: {'mtime': path.getmtime(file.name), 'copies': fs.copies} |
1273 | } |
1274 | - doc = create_file(ch.id, ch.file_size, ch.leaf_hashes, stored) |
1275 | + doc = create_file(time.time(), ch, stored) |
1276 | self.db.save(doc) |
1277 | missing.append(ch.id) |
1278 | |
1279 | @@ -123,9 +124,9 @@ |
1280 | set(['copies', 'mtime', 'verified']) |
1281 | ) |
1282 | verified = doc['stored'][fs.id]['verified'] |
1283 | - self.assertIsInstance(verified, (int, float)) |
1284 | - self.assertLessEqual(start, verified) |
1285 | - self.assertLessEqual(verified, end) |
1286 | + self.assertIsInstance(verified, int) |
1287 | + self.assertLessEqual(int(start), verified) |
1288 | + self.assertLessEqual(verified, int(end)) |
1289 | self.assertNotIn('corrupt', doc) |
1290 | for _id in bad: |
1291 | doc = self.db.get(_id) |
1292 | |
1293 | === modified file 'dmedia/verification.py' |
1294 | --- dmedia/verification.py 2012-08-07 00:14:58 +0000 |
1295 | +++ dmedia/verification.py 2012-11-21 12:55:46 +0000 |
1296 | @@ -84,7 +84,7 @@ |
1297 | doc['stored'][fs.id] = { |
1298 | 'copies': fs.copies, |
1299 | 'mtime': fs.stat(_id).mtime, |
1300 | - 'verified': time.time(), |
1301 | + 'verified': int(time.time()), |
1302 | } |
1303 | except CorruptFile: |
1304 | mark_corrupt(doc, fs) |
1305 | |
1306 | === modified file 'dmedia/views.py' |
1307 | --- dmedia/views.py 2012-11-02 06:13:47 +0000 |
1308 | +++ dmedia/views.py 2012-11-21 12:55:46 +0000 |
1309 | @@ -500,7 +500,6 @@ |
1310 | file_design, |
1311 | project_design, |
1312 | job_design, |
1313 | - user_design, |
1314 | ) |
1315 | |
1316 |
Approved!