Dmedia

Merge lp:~jderose/dmedia/2-core-bringup into lp:dmedia

2-core-bringup
Merge into trunk

Proposed by Jason Gerard DeRose on 2011-09-22

Status:	Merged
Merged at revision:	223
Proposed branch:	lp:~jderose/dmedia/2-core-bringup
Merge into:	lp:dmedia
Diff against target:	9294 lines (+2511/-5127) 26 files modified debian/changelog (+2/-2) debian/control (+6/-4) debian/rules (+8/-1) dmedia/client.py (+382/-0) dmedia/core.py (+8/-10) dmedia/errors.py (+0/-74) dmedia/extractor.py (+15/-18) dmedia/filestore.py (+0/-1163) dmedia/importer.py (+146/-388) dmedia/schema.py (+225/-368) dmedia/tests/base.py (+164/-0) dmedia/tests/couch.py (+6/-9) dmedia/tests/helpers.py (+10/-13) dmedia/tests/test_client.py (+259/-0) dmedia/tests/test_core.py (+31/-24) dmedia/tests/test_extractor.py (+249/-265) dmedia/tests/test_filestore.py (+28/-1316) dmedia/tests/test_importer.py (+474/-878) dmedia/tests/test_schema.py (+190/-225) dmedia/tests/test_transfers.py (+118/-113) dmedia/transfers.py (+26/-40) dmedia/workers.py (+1/-1) misc/hashbench.py (+24/-51) misc/hashbench2.py (+0/-119) misc/udisks-test.py (+138/-44) setup.py (+1/-1)
To merge this branch:	bzr merge lp:~jderose/dmedia/2-core-bringup
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
dmedia Dev		2011-09-22	Pending
Review via email: mp+76565@code.launchpad.net

Description of the change

Boy, if you thought the last merge proposal was big. The merge:

1) Ports the dmedia core to Python3, hooray!

2) Guts the old internal filestore, ports to the new Skein based filestore, hooray!

3) Revamps ImportWorker to take advantage of batch_import_iter() from the new filestore

4) Some schema tweaks, the most important of which is changing 'time' in the stored dict to 'mtime' and having this be the mtime from the file-system containing that store... important for quick reality checks so we can decide whether full verification of any files is urgently called for.

5) Started work on improved http dmedia client, which will talk to native dmedia http server (native as apposed to a remote service like UbuntuOne or S3)

6) Is really really really big!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches

to all changes:

Jason Gerard DeRose

dmedia Dev

 === modified file 'debian/changelog'
 --- debian/changelog	2011-08-24 01:23:18 +0000
 +++ debian/changelog	2011-09-22 11:43:29 +0000
@@ -1,5 +1,5 @@
--dmedia (11.09.0-0~natty) natty; urgency=low
++dmedia (11.09.0-0~oneiric) oneiric; urgency=low
    * Upstream 11.09.0 pre-release
-- -- Jason Gerard DeRose <jderose@novacut.com>  Wed, 24 Aug 2011 01:17:38 +0000
++ -- Jason Gerard DeRose <jderose@novacut.com>  Thu, 22 Sep 2011 10:49:07 +0000
 === modified file 'debian/control'
 --- debian/control	2011-09-16 05:32:07 +0000
 +++ debian/control	2011-09-22 11:43:29 +0000
@@ -2,16 +2,18 @@
  Section: python
  Priority: optional
  Maintainer: Jason Gerard DeRose <jderose@novacut.com>
--Build-Depends: debhelper (>= 8.9), python (>= 2.7)
++Build-Depends: debhelper (>= 8.9), python3 (>= 3.2)
  Standards-Version: 3.9.2
--X-Python-Version: 2.7
++X-Python-Version: 3.2
++X-Python3-Version: 3.2
  Homepage: https://launchpad.net/dmedia
  Package: dmedia
  Architecture: all
--Depends: ${misc:Depends}, python (>= 2.7),
++Depends: ${misc:Depends}, python3 (>= 3.2),
++    python3-filestore (>= 11.09),
++    python3-microfiber (>= 11.09),
      dc3 (>= 11.09),
--    python-microfiber (>= 11.09),
  Description: distributed media library
   A user-experienced-focused technology aimed at making file management go away
   for both content-creation and content-consumption.
 === modified file 'debian/rules'
 --- debian/rules	2011-08-27 14:14:43 +0000
 +++ debian/rules	2011-09-22 11:43:29 +0000
@@ -1,3 +1,10 @@
  #!/usr/bin/make -f
  %:
--	dh $@ --with=python2
++	dh $@ --with=python3
++
++override_dh_auto_install:
++	for pyvers in $(shell py3versions -vr); do \
++		python$$pyvers setup.py install \
++				--install-layout=deb \
++				--root $(CURDIR)/debian/dmedia; \
++	done
 === added file 'dmedia/client.py'
 --- dmedia/client.py	1970-01-01 00:00:00 +0000
 +++ dmedia/client.py	2011-09-22 11:43:29 +0000
@@ -0,0 +1,382 @@
++# dmedia: dmedia hashing protocol and file layout
++# Copyright (C) 2011 Novacut Inc
++#
++# This file is part of `dmedia`.
++#
++# `dmedia` is free software: you can redistribute it and/or modify it under
++# the terms of the GNU Affero General Public License as published by the Free
++# Software Foundation, either version 3 of the License, or (at your option) any
++# later version.
++#
++# `dmedia` is distributed in the hope that it will be useful, but WITHOUT ANY
++# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
++# A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
++# details.
++#
++# You should have received a copy of the GNU Affero General Public License along
++# with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
++#
++# Authors:
++#   Jason Gerard DeRose <jderose@novacut.com>
++
++"""
++dmedia HTTP client.
++"""
++
++import os
++from urllib.parse import urlparse
++from http.client import HTTPConnection, HTTPSConnection
++from collections import OrderedDict
++
++from filestore import LEAF_SIZE, TYPE_ERROR, hash_leaf, reader_iter
++from filestore import Leaf, ContentHash, SmartQueue, _start_thread
++
++from dmedia import __version__
++
++
++USER_AGENT = 'dmedia {}'.format(__version__)
++
++
++class HTTPError(Exception):
++    """
++    Base class for custom HTTP client exceptions.
++    """
++
++    def __init__(self, response, method, path):
++        self.response = response
++        self.method = method
++        self.path = path
++        self.data = response.read()
++        super().__init__(
++            '{} {}: {} {}'.format(response.status, response.reason, method, path)
++        )
++
++
++class ClientError(HTTPError):
++    """
++    Base class for all 4xx Client Error exceptions.
++    """
++
++
++class BadRequest(ClientError):
++    """
++    400 Bad Request.
++    """
++
++
++class Unauthorized(ClientError):
++    """
++    401 Unauthorized.
++    """
++
++
++class Forbidden(ClientError):
++    """
++    403 Forbidden.
++    """
++
++
++class NotFound(ClientError):
++    """
++    404 Not Found.
++    """
++
++
++class MethodNotAllowed(ClientError):
++    """
++    405 Method Not Allowed.
++    """
++
++
++class NotAcceptable(ClientError):
++    """
++    406 Not Acceptable.
++    """
++
++
++class Conflict(ClientError):
++    """
++    409 Conflict.
++
++    Raised when the request resulted in an update conflict.
++    """
++
++
++class PreconditionFailed(ClientError):
++    """
++    412 Precondition Failed.
++    """
++
++
++class BadContentType(ClientError):
++    """
++    415 Unsupported Media Type.
++    """
++
++
++class BadRangeRequest(ClientError):
++    """
++    416 Requested Range Not Satisfiable.
++    """
++
++
++class ExpectationFailed(ClientError):
++    """
++    417 Expectation Failed.
++
++    Raised when a bulk operation failed.
++    """
++
++
++class ServerError(HTTPError):
++    """
++    Used to raise exceptions for any 5xx Server Errors.
++    """
++
++
++errors = {
++    400: BadRequest,
++    401: Unauthorized,
++    403: Forbidden,
++    404: NotFound,
++    405: MethodNotAllowed,
++    406: NotAcceptable,
++    409: Conflict,
++    412: PreconditionFailed,
++    415: BadContentType,
++    416: BadRangeRequest,
++    417: ExpectationFailed,
++}
++
++
++def http_conn(url, **options):
++    """
++    Return (connection, parsed) tuple.
++
++    For example:
++
++    >>> (conn, parsed) = http_conn('http://foo.s3.amazonaws.com/')
++
++    The returned connection will be either an ``HTTPConnection`` or
++    ``HTTPSConnection`` instance based on the *url* scheme.
++
++    The 2nd item in the returned tuple will be *url* parsed with ``urlparse()``.
++    """
++    u = urlparse(url)
++    if u.scheme not in ('http', 'https'):
++        raise ValueError('url scheme must be http or https: {!r}'.format(url))
++    if not u.netloc:
++        raise ValueError('bad url: {!r}'.format(url))
++    klass = (HTTPConnection if u.scheme == 'http' else HTTPSConnection)
++    conn = klass(u.netloc, **options)
++    return (conn, u)
++
++
++def bytes_range(start, stop=None):
++    """
++    Convert from Python slice semantics to an HTTP Range request.
++
++    Python slice semantics are quite natural to deal with, whereas the HTTP
++    Range semantics are a touch wacky, so this function will help prevent silly
++    errors.
++
++    For example, say we're requesting parts of a 10,000 byte long file.  This
++    requests the first 500 bytes:
++
++    >>> bytes_range(0, 500)
++    'bytes=0-499'
++
++    This requests the second 500 bytes:
++
++    >>> bytes_range(500, 1000)
++    'bytes=500-999'
++
++    All three of these request the final 500 bytes:
++
++    >>> bytes_range(9500, 10000)
++    'bytes=9500-9999'
++    >>> bytes_range(-500)
++    'bytes=-500'
++    >>> bytes_range(9500)
++    'bytes=9500-'
++
++    For details on HTTP Range header, see:
++
++      http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
++    """
++    if start < 0:
++        assert stop is None
++        return 'bytes={}'.format(start)
++    end = ('' if stop is None else stop - 1)
++    return 'bytes={}-{}'.format(start, end)
++
++
++def check_slice(ch, start, stop):
++    """
++    Validate the crap out of a leaf-wise slice of a file.
++    """
++    if not isinstance(ch, ContentHash):
++        raise TypeError(
++            TYPE_ERROR.format('ch', ContentHash, type(ch), ch)
++        )
++    if not isinstance(ch.leaf_hashes, tuple):
++        raise TypeError(
++            'ch.leaf_hashes not unpacked for ch.id={}'.format(ch.id)
++        )
++    if not ch.leaf_hashes:
++        raise ValueError('got empty ch.leaf_hashes for ch.id={}'.format(ch.id))
++    if not isinstance(start, int):
++        raise TypeError(
++            TYPE_ERROR.format('start', int, type(start), start)
++        )
++    if not (stop is None or isinstance(stop, int)):
++        raise TypeError(
++            TYPE_ERROR.format('stop', int, type(stop), stop)
++        )
++    if not (0 <= start < len(ch.leaf_hashes)):
++        raise ValueError('Need 0 <= start < {}; got start={}'.format(
++               len(ch.leaf_hashes), start)
++        )
++    if not (stop is None or 1 <= stop <= len(ch.leaf_hashes)):
++        raise ValueError('Need 1 <= stop <= {}; got stop={}'.format(
++               len(ch.leaf_hashes), stop)
++        )
++    if not (stop is None or start < stop):
++        raise ValueError(
++            'Need start < stop; got start={}, stop={}'.format(start, stop)
++        )
++
++
++def range_header(ch, start=0, stop=None):
++    check_slice(ch, start, stop)
++    if start == 0 and (stop is None or stop == len(ch.leaf_hashes)):
++        return {}
++    _start = start * LEAF_SIZE
++    if stop is None or stop == len(ch.leaf_hashes):
++        _stop = None
++    else:
++        _stop = stop * LEAF_SIZE
++    return {'Range': bytes_range(_start, _stop)}
++
++
++def response_reader(response, queue, start=0):
++    try:
++        index = start
++        while True:
++            data = response.read(LEAF_SIZE)
++            if not data:
++                queue.put(None)
++                break
++            queue.put(Leaf(index, data))
++            index += 1
++    except Exception as e:
++        queue.put(e)
++
++
++def threaded_response_iter(response, start=0):
++    q = SmartQueue(4)
++    thread = _start_thread(response_reader, response, q, start)
++    while True:
++        leaf = q.get()
++        if leaf is None:
++            break
++        yield leaf
++    thread.join()  # Make sure reader() terminates
++
++
++def response_iter(response, start=0):
++    index = start
++    while True:
++        data = response.read(LEAF_SIZE)
++        if not data:
++            break
++        yield Leaf(index, data)
++        index += 1
++
++
++def missing_leaves(ch, tmp_fp):
++    assert isinstance(ch.leaf_hashes, tuple)
++    assert os.fstat(tmp_fp.fileno()).st_size == ch.file_size
++    assert tmp_fp.mode in ('rb+', 'r+b')
++    tmp_fp.seek(0)
++    for leaf in reader_iter(tmp_fp):
++        leaf_hash = ch.leaf_hashes[leaf.index]
++        if hash_leaf(leaf.index, leaf.data) != leaf_hash:
++            yield (leaf.index, leaf_hash)
++    assert leaf.index == len(ch.leaf_hashes) - 1
++
++
++class DownloadWriter:
++    def __init__(self, ch, store):
++        self.ch = ch
++        self.store = store
++        self.tmp_fp = store.allocate_partial(ch.file_size, ch.id)
++        self.resumed = (self.tmp_fp.mode != 'wb')
++        if self.resumed:
++            gen = missing_leaves(ch, self.tmp_fp)
++        else:
++            gen = enumerate(ch.leaf_hashes)
++        self.missing = OrderedDict(gen)
++
++    def write_leaf(self, leaf):
++        if hash_leaf(leaf.index, leaf.data) != self.ch.leaf_hashes[leaf.index]:
++            return False
++        self.tmp_fp.seek(leaf.index * LEAF_SIZE)
++        self.tmp_fp.write(leaf.data)
++        lh = self.missing.pop(leaf.index)
++        assert lh == self.ch.leaf_hashes[leaf.index]
++        return True
++
++    def next_slice(self):
++        if not self.missing:
++            raise Exception('done!')
++        first = None
++        for i in self.missing:
++            if first is None:
++                first = i
++                last = i
++            elif i != last + 1:
++                return (first, last + 1)
++            else:
++                last = i
++        return (first, last + 1)
++
++    def finish(self):
++        assert not self.missing
++        self.tmp_fp.close()
++        tmp_fp = open(self.tmp_fp.name, 'rb')
++        return self.store.verify_and_move(tmp_fp, self.ch.id)
++
++
++class HTTPClient:
++    def __init__(self, url, debug=False):
++        (self.conn, u) = http_conn(url)
++        self.basepath = (u.path if u.path.endswith('/') else u.path + '/')
++        self.url = ''.join([u.scheme, '://', u.netloc, self.basepath])
++        self.u = u
++        if debug:
++            self.conn.set_debuglevel(1)
++
++    def request(self, method, relpath, body=None, headers=None):
++        assert not relpath.startswith('/')
++        path = self.basepath + relpath
++        h = {'User-Agent': USER_AGENT}
++        if headers:
++            h.update(headers)
++        try:
++            self.conn.request(method, path, body, h)
++            response = self.conn.getresponse()
++        except Exception as e:
++            self.conn.close()
++            raise e
++        if response.status >= 500:
++            raise ServerError(response, method, path)
++        if response.status >= 400:
++            E = errors.get(response.status, ClientError)
++            raise E(response, method, path)
++        return response
++
++    def get(self, ch, start=0, stop=None):
++        headers = range_header(ch, start, stop)
++        return self.request('GET', ch.id, headers=headers)
++
 === modified file 'dmedia/core.py'
 --- dmedia/core.py	2011-09-15 11:41:48 +0000
 +++ dmedia/core.py	2011-09-22 11:43:29 +0000
@@ -56,8 +56,8 @@
  import json
  from microfiber import Database, NotFound, Conflict
++from filestore import FileStore
--from .filestore import FileStore
  from .constants import DBNAME
  from .transfers import TransferManager
  from .schema import random_id, create_machine, create_store
@@ -126,8 +126,8 @@
          if not self.local['filestores']:
              self.add_filestore(self.home)
          else:
--            for (parentdir, store) in self.local['filestores'].iteritems():
--                assert store['path'] == parentdir
++            for (parentdir, store) in self.local['filestores'].items():
++                assert store['parentdir'] == parentdir
                  try:
                      self.init_filestore(store)
                  except Exception:
@@ -137,11 +137,11 @@
                  except Conflict:
                      pass
              if self.local.get('default_filestore') not in self.local['filestores']:
--                self.local['default_filestore'] = store['path']
++                self.local['default_filestore'] = store['parentdir']
          return self.local['filestores'][self.local['default_filestore']]
      def init_filestore(self, store):
--        parentdir = store['path']
++        parentdir = store['parentdir']
          self._filestores[parentdir] = FileStore(parentdir)
      def add_filestore(self, parentdir):
@@ -156,16 +156,14 @@
          store = create_store(parentdir, self.machine_id)
          self.init_filestore(store)
          self.local['filestores'][parentdir] = deepcopy(store)
--        self.local['default_filestore'] = store['path']
++        self.local['default_filestore'] = store['parentdir']
          self.db.save(self.local)
          self.db.save(store)
          return store
      def get_file(self, file_id):
--        doc = self.db.get(file_id)
--        ext = doc.get('ext')
--        for fs in self._filestores.itervalues():
--            filename = fs.path(file_id, ext)
++        for fs in self._filestores.values():
++            filename = fs.path(file_id)
              if path.isfile(filename):
                  return filename
 === removed file 'dmedia/errors.py'
 --- dmedia/errors.py	2011-04-20 08:13:48 +0000
 +++ dmedia/errors.py	1970-01-01 00:00:00 +0000
@@ -1,74 +0,0 @@
--# Authors:
--#   Jason Gerard DeRose <jderose@novacut.com>
--#
--# dmedia: distributed media library
--# Copyright (C) 2010 Jason Gerard DeRose <jderose@novacut.com>
--#
--# This file is part of `dmedia`.
--#
--# `dmedia` is free software: you can redistribute it and/or modify it under the
--# terms of the GNU Affero General Public License as published by the Free
--# Software Foundation, either version 3 of the License, or (at your option) any
--# later version.
--#
--# `dmedia` is distributed in the hope that it will be useful, but WITHOUT ANY
--# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
--# A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
--# details.
--#
--# You should have received a copy of the GNU Affero General Public License along
--# with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
--
--"""
--Custom exceptions
--"""
--
--class DmediaError(StandardError):
--    """
--    Base class for all custom dmedia exceptions.
--    """
--
--    _format = ''
--
--    def __init__(self, **kw):
--        self._kw = kw
--        for (key, value) in kw.iteritems():
--            assert not hasattr(self, key), 'conflicting kwarg %s.%s = %r' % (
--                self.__class__.__name__, key, value,
--            )
--            setattr(self, key, value)
--        super(DmediaError, self).__init__(self._format % kw)
--
--
--class AmbiguousPath(DmediaError):
--    _format = '%(pathname)r resolves to %(abspath)r'
--
--
--class FileStoreTraversal(DmediaError):
--    """
--    Raised when what should be internal path traverses out of FileStore base.
--
--    For example:
--
--    >>> raise FileStoreTraversal(abspath='/foo/barNone/baz', base='/foo/bar')
--    Traceback (most recent call last):
--      ...
--    FileStoreTraversal: '/foo/barNone/baz' outside base '/foo/bar'
--    """
--    _format = '%(abspath)r outside base %(base)r'
--
--
--class DuplicateFile(DmediaError):
--    _format = 'chash=%(chash)r, src=%(src)r, dst=%(dst)r'
--
--
--class DownloadFailure(DmediaError):
--    _format = 'leaf %(leaf)d expected %(expected)r; got %(got)r'
--
--
--class IntegrityError(DmediaError):
--    _format = 'got chash %(got)r; expected %(expected)r for %(filename)r'
--
--
--class TopHashError(DmediaError):
--    _format = 'got tophash %(got)r; expected %(expected)r (size: %(size)r bytes)'
 === modified file 'dmedia/extractor.py'
 --- dmedia/extractor.py	2011-07-12 03:12:55 +0000
 +++ dmedia/extractor.py	2011-09-22 11:43:29 +0000
@@ -25,7 +25,7 @@
  """
  from os import path
--from subprocess import check_call, Popen, PIPE
++from subprocess import check_call, check_output, CalledProcessError
  import json
  import tempfile
  import shutil
@@ -95,16 +95,16 @@
      """
      Attempt to extract EXIF metadata from file at *filename*.
      """
++    cmd = ['exiftool', '-j', filename]
      try:
--        args = ['exiftool', '-j', filename]
--        (stdout, stderr) = Popen(args, stdout=PIPE).communicate()
--        exif = json.loads(stdout)[0]
--        assert isinstance(exif, dict)
--        for key in EXIFTOOL_IGNORE:
--            exif.pop(key, None)
--        return exif
--    except Exception as e:
--        return {u'Error': u'%s: %s' % (e.__class__.__name__, e)}
++        output = check_output(cmd)
++    except CalledProcessError:
++        return {}
++    exif = json.loads(output.decode('utf-8'))[0]
++    assert isinstance(exif, dict)
++    for key in EXIFTOOL_IGNORE:
++        exif.pop(key, None)
++    return exif
  def parse_subsec_datetime(string):
@@ -119,7 +119,7 @@
      >>> parse_subsec_datetime('2010:10:21 01:44:37')
      1287625477.0
      """
--    if not isinstance(string, basestring):
++    if not isinstance(string, str):
          return
      parts = string.split('.')
      if len(parts) == 1:
@@ -165,13 +165,10 @@
      Attempt to extract video metadata from video at *filename*.
      """
      try:
--        args = ['totem-video-indexer', filename]
--        popen = Popen(args, stdout=PIPE)
--        (stdout, stderr) = popen.communicate()
--        if popen.returncode != 0:
--            return {}
++        cmd = ['totem-video-indexer', filename]
++        output = check_output(cmd).decode('utf-8')
          info = {}
--        for line in stdout.splitlines():
++        for line in output.splitlines():
              pair = line.split('=', 1)
              if len(pair) != 2:
                  continue
@@ -240,7 +237,7 @@
  def merge_exif(src, attachments):
      exif = extract_exif(src)
--    for (key, values) in EXIF_REMAP.iteritems():
++    for (key, values) in EXIF_REMAP.items():
          for v in values:
              if v in exif:
                  yield (key, exif[v])
 === removed file 'dmedia/filestore.py'
 --- dmedia/filestore.py	2011-06-15 04:46:55 +0000
 +++ dmedia/filestore.py	1970-01-01 00:00:00 +0000
@@ -1,1163 +0,0 @@
--# Authors:
--#   Jason Gerard DeRose <jderose@novacut.com>
--#   Akshat Jain <ssj6akshat1234@gmail.com)
--#
--# dmedia: distributed media library
--# Copyright (C) 2010, 2011 Jason Gerard DeRose <jderose@novacut.com>
--#
--# This file is part of `dmedia`.
--#
--# `dmedia` is free software: you can redistribute it and/or modify it under the
--# terms of the GNU Affero General Public License as published by the Free
--# Software Foundation, either version 3 of the License, or (at your option) any
--# later version.
--#
--# `dmedia` is distributed in the hope that it will be useful, but WITHOUT ANY
--# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
--# A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
--# details.
--#
--# You should have received a copy of the GNU Affero General Public License along
--# with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
--
--"""
--Store files in a special layout according to their content-hash.
--
--The `FileStore` is the heart of dmedia.  Files are assigned a canonical name
--based on the file's content-hash, and are placed in a special layout within the
--`FileStore` base directory.
--
--The files in a `FileStore` are read-only... they must be as modifying a file
--will change its content-hash.  The only way to modify a file is to copy the
--original to a temporary file, modify it, and then place the new file into the
--`FileStore`.  This might seem like an unreasonable restriction, but it perfectly
--captures the use case dmedia is concerned with... a distributed library of media
--files.
--
--On the content-creation side, non-destructive editing is certainly the best
--practice, especially in professional use cases.  On the content consumption
--side, modifying a file is rather rare.  And the somewhat common use case --
--modifying a file for the sake of updating metadata (say, EXIF) -- can instead be
--accomplished by updating metadata in the corresponding CouchDB document.
--
--Importantly, without the read-only restriction, it would be impossible to make a
--distributed file system whose file operations remain robust and atomic in the
--face of arbitrary and prolonged network outages.  True to its CouchDB
--foundations, dmedia is designing with the assumption that network connectivity
--is the exception rather than the rule.
--
--Please read on for the rationale of some key `FileStore` design decisions...
--
--
--Design Decision: base32-encoded content-hash
--============================================
--
--The `FileStore` layout was designed to allow the canonical filename to be
--constructed from the content-hash in the simplest way possible, without
--requiring any special decoding or encoding.  For this reason, the content-hash
--(as stored in CouchDB) is base32-encoded.
--
--Base32-encoding was chosen because:
--
--    1. It's more compact than base16/hex
--
--    2. It can be used to name files on case *insensitive* filesystems (whereas
--       base64-encoding cannot)
--
--Inside the `FileStore`, the first 2 characters of the content-hash are used for
--the subdirectory name, and the remaining characters for the filename within that
--subdirectory.  For example:
--
-->>> from os import path
-->>> chash = 'ZR765XWSF6S7JQHLUI4GCG5BHGPE252O'
-->>> path.join('/foo', chash[:2], chash[2:])
--'/foo/ZR/765XWSF6S7JQHLUI4GCG5BHGPE252O'
--
--
--Design Decision: canonical filenames have file extensions
--=========================================================
--
--Strictly speaking, there is no technical reason to include a file extension on
--the canonical filenames.  However, there are some practical reasons that make
--including the file extension worthwhile, despite additional complexity it adds
--to the `FileStore` API.
--
--Most importantly, it allows files in a `FileStore` layout to be served with the
--correct Content-Type by a vanilla web-server.  A key design goal was to be able
--to point, say, Apache at a dmedia `FileStore` directory have a useful dmedia
--file server without requiring special Apache plugins for dmedia integration.
--
--It also provides broader software compatibility as many applications and
--libraries do rely on the file extension for type determination.  And the file
--extension is helpful for developers, as a bit of intelligible information in
--canonical filename will make the layout easier to explore, aid debugging.
--
--The current `FileStore` always includes the file extension on the canonical name
--when the extension is provided by the calling code.  However, the API is
--designed to accommodate `FileStore` implementations that do not include the
--file extension.  The API is also designed so that the calling code isn't
--required to provide the file extension... say, if the extension was ever removed
--from the CouchDB schema.
--
--To accomplish this, files are identified by the content-hash and extension
--together, and the extension is optional, defaulting to ``None``.  This is the
--typical calling signature:
--
-->>> def canonical(chash, ext=None):
--...     pass
--
--For example:
--
-->>> FileStore.relpath('ZR765XWSF6S7JQHLUI4GCG5BHGPE252O')
--('ZR', '765XWSF6S7JQHLUI4GCG5BHGPE252O')
-->>> FileStore.relpath('ZR765XWSF6S7JQHLUI4GCG5BHGPE252O', 'mov')
--('ZR', '765XWSF6S7JQHLUI4GCG5BHGPE252O.mov')
--
--
--Design Decision: security good, path traversals bad
--===================================================
--
--The `FileStore` is probably the most security sensitive part of dmedia in that
--untrusted data (content-hash, file extension) is used to construct paths on the
--filesystem.  This means that the `FileStore` must be carefully designed to
--prevent path traversal attacks (aka directory traversal attacks).
--
--Two lines of defense are used.  First, the content-hash and file extension are
--validated with the following functions:
--
--    * `safe_b32()` - validates the content-hash
--
--    * `safe_ext()` - validates the file extension
--
--Second, there are methods that ensure that paths constructed relative to the
--`FileStore` base directory cannot be outside of the base directory:
--
--    * `FileStore.check_path()` - ensures that a path is inside the base
--       directory
--
--    * `FileStore.join()` - creates a path relative to the base directory,
--       ensures resulting path is inside the base directory
--
--    * `FileStore.create_parent()` - creates a file's parent directory only if
--       that parent directory is inside the base directory
--
--Each line of defense is designed to fully prevent path traversals, assumes the
--other defense doesn't exist or will fail.  Together, they should provide a
--strong defense against path traversal attacks.
--
--If you discover any security vulnerability in dmedia, please immediately file a
--bug:
--
--    https://bugs.launchpad.net/dmedia/+filebug
--"""
--
--import os
--from os import path
--import stat
--import tempfile
--from hashlib import sha1
--from base64 import b32encode, b32decode
--import json
--import re
--import logging
--from subprocess import check_call, CalledProcessError
--from threading import Thread
--from Queue import Queue
--
--from .errors import AmbiguousPath, FileStoreTraversal
--from .errors import DuplicateFile, IntegrityError
--from .constants import LEAF_SIZE, TYPE_ERROR, EXT_PAT
--from .constants import TRANSFERS_DIR, IMPORTS_DIR, WRITES_DIR
--
--B32LENGTH = 32  # Length of base32-encoded hash
--QUICK_ID_CHUNK = 2 ** 20  # Amount to read for quick_id()
--FALLOCATE = '/usr/bin/fallocate'
--EXT_RE = re.compile(EXT_PAT)
--log = logging.getLogger()
--
--
--def safe_path(pathname):
--    """
--    Ensure that *pathname* is a normalized absolute path.
--
--    This is to help protect against path-traversal attacks and to prevent use of
--    ambiguous relative paths.
--
--    For example, if *pathname* is not a normalized absolute path,
--    `AmbiguousPath` is raised:
--
--    >>> safe_path('/foo/../root')
--    Traceback (most recent call last):
--      ...
--    AmbiguousPath: '/foo/../root' resolves to '/root'
--
--    Otherwise *pathname* is returned unchanged:
--
--    >>> safe_path('/foo/bar')
--    '/foo/bar'
--
--    Also see `safe_open()`.
--    """
--    if path.abspath(pathname) != pathname:
--        raise AmbiguousPath(pathname=pathname, abspath=path.abspath(pathname))
--    return pathname
--
--
--def safe_open(filename, mode):
--    """
--    Only open file if *filename* is a normalized absolute path.
--
--    This is to help protect against path-traversal attacks and to prevent use of
--    ambiguous relative paths.
--
--    Prior to opening the file, *filename* is checked with `safe_path()`.  If
--    it's not an absolute normalized path, `AmbiguousPath` is raised:
--
--    >>> safe_open('/foo/../root', 'rb')
--    Traceback (most recent call last):
--      ...
--    AmbiguousPath: '/foo/../root' resolves to '/root'
--
--    Otherwise returns a ``file`` instance created with ``open()``.
--    """
--    return open(safe_path(filename), mode)
--
--
--def safe_ext(ext):
--    r"""
--    Verify that extension *ext* contains only lowercase ascii letters, digits.
--
--    A malicious *ext* could cause path traversal or other security gotchas,
--    thus this sanity check.  When *wav* is valid, it is returned unchanged:
--
--    >>> safe_ext('ogv')
--    'ogv'
--    >>> safe_ext('tar.gz')
--    'tar.gz'
--
--    However, when *ext* does not conform, a ``TypeError`` or ``ValueError`` is
--    raised:
--
--    >>> safe_ext('/../.ssh')
--    Traceback (most recent call last):
--      ...
--    ValueError: ext '/../.ssh' does not match pattern '^[a-z0-9]+(\\.[a-z0-9]+)?$'
--
--    Also see `safe_b32()`.
--    """
--    if not isinstance(ext, basestring):
--        raise TypeError(
--            TYPE_ERROR % ('ext', basestring, type(ext), ext)
--        )
--    if not EXT_RE.match(ext):
--        raise ValueError(
--            'ext %r does not match pattern %r' % (ext, EXT_PAT)
--        )
--    return ext
--
--
--def safe_b32(b32):
--    """
--    Verify that *b32* is valid base32-encoding and correct length.
--
--    A malicious *b32* could cause path traversal or other security gotchas,
--    thus this sanity check.  When *b2* is valid, it is returned unchanged:
--
--    >>> safe_b32('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW')
--    'NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW'
--
--    However, when *b32* does not conform, a ``TypeError`` or ``ValueError`` is
--    raised:
--
--    >>> safe_b32('NWBNVXVK5DQGIOW7MYR4K3KA')
--    Traceback (most recent call last):
--      ...
--    ValueError: len(b32) must be 32; got 24: 'NWBNVXVK5DQGIOW7MYR4K3KA'
--
--    Also see `safe_ext()`.
--    """
--    if not isinstance(b32, basestring):
--        raise TypeError(
--            TYPE_ERROR % ('b32', basestring, type(b32), b32)
--        )
--    try:
--        b32decode(b32)
--    except TypeError as e:
--        raise ValueError('b32: cannot b32decode %r: %s' % (b32, e))
--    if len(b32) != B32LENGTH:
--        raise ValueError('len(b32) must be %d; got %d: %r' %
--            (B32LENGTH, len(b32), b32)
--        )
--    return b32
--
--
--def tophash_personalization(file_size):
--    """
--    Personalize the top-hash with *file_size*.
--
--    For example:
--
--    >>> tophash_personalization(3141)
--    'dmedia/tophash 3141'
--
--    This is used to cryptographically tie ``doc['bytes']`` to ``doc['_id']``.
--    You can't change the leaves or the file size without affecting the top-hash.
--
--    The personalization is designed to be easy to implement in JavaScript.  For
--    example, this is the equivalent JavaScript function:
--
--        ::
--
--            function tophash_personalization(file_size) {
--                return ['dmedia/tophash', file_size].join(' ');
--            }
--
--    When hashing with Skein, this value would be used for the Skein
--    personalization parameter.  See PySkein and the Skein specification for
--    details:
--
--        http://packages.python.org/pyskein/
--
--        http://www.skein-hash.info/
--
--    When hashing with sha1, the top-hash is calculated like this:
--
--    >>> from hashlib import sha1
--    >>> from base64 import b32encode
--    >>> pers = tophash_personalization(3141)
--    >>> leaves = b'pretend this is the concatenated leaves'
--    >>> b32encode(sha1(pers + leaves).digest())  # The top-hash
--    'M55ORBTYICEDQ2WUREDYIYYO6VUJ3R6S'
--
--    :param file_size: the file size in bytes (an ``int``)
--    """
--    return ' '.join(['dmedia/tophash', str(file_size)]).encode('utf-8')
--
--
--def tophash(file_size):
--    """
--    Initialize hash for a file that is *file_size* bytes.
--    """
--    return sha1(tophash_personalization(file_size))
--
--
--def leafhash_personalization(file_size, leaf_index):
--    """
--    Personalize the leaf-hash with *file_size* and *leaf_index*.
--
--    For example:
--
--    >>> leafhash_personalization(3141, 0)
--    'dmedia/leafhash 3141 0'
--
--    :param file_size: the file size in bytes (an ``int``)
--    :param leaf_index: the index of this leaf (an ``int``, starting at zero)
--    """
--    return ' '.join(
--        ['dmedia/leafhash', str(file_size), str(leaf_index)]
--    ).encode('utf-8')
--
--
--def leafhash(file_size, leaf_index):
--    """
--    Initialize hash for the *leaf_index* leaf in a file of *file_size* bytes.
--    """
--    return sha1(leafhash_personalization(file_size, leaf_index))
--
--
--class HashList(object):
--    """
--    Simple hash-list (a 1-deep tree-hash).
--
--    For swarm upload/download, we need to keep the content hashes of the
--    individual leaves, a list of which is available via the ``HashList.leaves``
--    attribute after `HashList.run()` has been called.
--
--    The effective content-hash for the entire file is a hash of the leaf hashes
--    concatenated together.  This is handy because it gives us a
--    cryptographically strong way to associate individual leaves with the file
--    "_id".  This is important because otherwise malicious peers could pollute
--    the network with invalid leaves, but victims wouldn't know anything was
--    wrong till the entire file was downloaded.  The whole file would fail to
--    verify, and worse, the victim would have no way of knowing which leaves were
--    invalid.
--
--    In order to maximize IO utilization, the hash is computed in two threads.
--    The main thread reads chunks from *src_fp* and puts them into a queue.  The
--    2nd thread gets chunks from the queue, updates the hash, and then optionally
--    writes the chunk to *dst_fp* if one was provided when the `HashList` was
--    created.
--
--    For some background, see:
--
--        https://bugs.launchpad.net/dmedia/+bug/704272
--
--    For more information about hash-lists and tree-hashes, see:
--
--      http://en.wikipedia.org/wiki/Hash_list
--
--      http://en.wikipedia.org/wiki/Tree_hash
--    """
--
--    def __init__(self, src_fp, dst_fp=None, leaf_size=LEAF_SIZE):
--        if not isinstance(src_fp, file):
--            raise TypeError(
--                TYPE_ERROR % ('src_fp', file, type(src_fp), src_fp)
--            )
--        if src_fp.mode != 'rb':
--            raise ValueError(
--                "src_fp: mode must be 'rb'; got %r" % src_fp.mode
--            )
--        if dst_fp is not None:
--            if not isinstance(dst_fp, file):
--                raise TypeError(
--                    TYPE_ERROR % ('dst_fp', file, type(dst_fp), dst_fp)
--                )
--            if dst_fp.mode not in ('wb', 'r+b'):
--                raise ValueError(
--                    "dst_fp: mode must be 'wb' or 'r+b'; got %r" % dst_fp.mode
--                )
--        self.src_fp = src_fp
--        self.dst_fp = dst_fp
--        self.leaf_size = leaf_size
--        self.file_size = os.fstat(src_fp.fileno()).st_size
--        self.h = tophash(self.file_size)
--        self.leaves = []
--        self.q = Queue(4)
--        self.thread = Thread(target=self.hashing_thread)
--        self.thread.daemon = True
--        self.__ran = False
--
--    def update(self, chunk):
--        """
--        Update hash with *chunk*, optionally write to dst_fp.
--
--        This will append the content-hash of *chunk* to ``HashList.leaves`` and
--        update the top-hash.
--
--        If the `HashList` was created with a *dst_fp*, *chunk* will be will be
--        written to *dst_fp*.
--
--        `HashList.hashing_thread()` calls this method once for each chunk in the
--        queue.  This functionality is in its own method simply to make testing
--        easier.
--        """
--        digest = sha1(chunk).digest()
--        self.h.update(digest)
--        self.leaves.append(digest)
--        if self.dst_fp is not None:
--            self.dst_fp.write(chunk)
--
--    def hashing_thread(self):
--        while True:
--            chunk = self.q.get()
--            if not chunk:
--                break
--            self.update(chunk)
--
--    def run(self):
--        assert self.__ran is False
--        self.__ran = True
--        self.src_fp.seek(0)  # Make sure we are at beginning of file
--        self.thread.start()
--        while True:
--            chunk = self.src_fp.read(self.leaf_size)
--            self.q.put(chunk)
--            if not chunk:
--                break
--        self.thread.join()
--        return b32encode(self.h.digest())
--
--
--def pack_leaves(leaves, digest_bytes=20):
--    """
--    Pack leaves together into a ``bytes`` instance for CouchDB attachment.
--
--    :param leaves: a ``list`` containing content-hash of each leaf in the file
--        (content-hash is binary digest, not base32-encoded)
--    :param digest_bytes: digest size in bytes; default is 20 (160 bits)
--    """
--    for (i, leaf) in enumerate(leaves):
--        if len(leaf) != digest_bytes:
--            raise ValueError('digest_bytes=%d, but len(leaves[%d]) is %d' % (
--                    digest_bytes, i, len(leaf)
--                )
--            )
--    return ''.join(leaves)
--
--
--def unpack_leaves(data, digest_bytes=20):
--    """
--    Unpack binary *data* into a list of leaf digests.
--
--    :param data: a ``bytes`` instance containing the packed leaf digests
--    :param digest_bytes: digest size in bytes; default is 20 (160 bits)
--    """
--    assert isinstance(data, bytes)
--    if len(data) % digest_bytes != 0:
--        raise ValueError(
--            'len(data)=%d, not multiple of digest_bytes=%d' % (
--                len(data), digest_bytes
--            )
--        )
--    return [
--        data[i*digest_bytes : (i+1)*digest_bytes]
--        for i in xrange(len(data) / digest_bytes)
--    ]
--
--
--def quick_id(fp):
--    """
--    Compute a quick reasonably unique ID for the open file *fp*.
--    """
--    if not isinstance(fp, file):
--        raise TypeError(
--            TYPE_ERROR % ('fp', file, type(fp), fp)
--        )
--    if fp.mode != 'rb':
--        raise ValueError("fp: must be opened in mode 'rb'; got %r" % fp.mode)
--    fp.seek(0)  # Make sure we are at beginning of file
--    h = sha1()
--    size = os.fstat(fp.fileno()).st_size
--    h.update(str(size).encode('utf-8'))
--    h.update(fp.read(QUICK_ID_CHUNK))
--    return b32encode(h.digest())
--
--
--def fallocate(size, filename):
--    """
--    Attempt to efficiently preallocate file *filename* to *size* bytes.
--
--    If the fallocate command is available, it will always at least create an
--    empty file (the equivalent of ``touch filename``), even the file-system
--    doesn't support pre-allocation.
--    """
--    if not isinstance(size, (int, long)):
--        raise TypeError(
--            TYPE_ERROR % ('size', (int, long), type(size), size)
--        )
--    if size <= 0:
--        raise ValueError('size must be >0; got %r' % size)
--    filename = safe_path(filename)
--    if not path.isfile(FALLOCATE):
--        return None
--    try:
--        check_call([FALLOCATE, '-l', str(size), filename])
--        return True
--    except CalledProcessError:
--        return False
--
--
--class FileStore(object):
--    """
--    Arranges files in a special layout according to their content-hash.
--
--    Security note: this class must be carefully designed to prevent path
--    traversal!
--
--    To create a `FileStore`, you give it the directory that will be its base on
--    the filesystem:
--
--    >>> fs = FileStore('/home/jderose')  #doctest: +SKIP
--    >>> fs.base  #doctest: +SKIP
--    '/home/jderose/.dmedia'
--
--    If you don't supply *base*, a temporary directory will be created for you:
--
--    >>> fs = FileStore()
--    >>> fs.base  #doctest: +ELLIPSIS
--    '/tmp/.../.dmedia'
--
--    You can add files to the store using `FileStore.import_file()`:
--
--    >>> from dmedia.tests import sample_mov  # Sample .MOV file
--    >>> src_fp = open(sample_mov, 'rb')
--    >>> fs.import_file(src_fp, 'mov')  #doctest: +ELLIPSIS
--    ('TGX33XXWU3EVHEEY5J7NBOJGKBFXLEBK', [...])
--
--    And when you have the content-hash and extension, you can retrieve the full
--    path of the file using `FileStore.path()`:
--
--    >>> fs.path('HIGJPQWY4PI7G7IFOB2G4TKY6PMTJSI7', 'mov')  #doctest: +ELLIPSIS
--    '/tmp/.../.dmedia/HI/GJPQWY4PI7G7IFOB2G4TKY6PMTJSI7.mov'
--
--    As the files are assumed to be read-only and unchanging, moving a file into
--    its canonical location must be atomic.  There are 2 scenarios that must be
--    considered:
--
--        1. Imports - we compute the content-hash as we copy the file into the
--           `FileStore`, so this requires a randomly-named temporary file.  When
--           the copy completes, file is renamed to its canonical name.
--
--        2. Transfers - as uploads/downloads might stop or fail and then be
--           resumed, this requires a canonically-named temporary file.  As the
--           file content-hash is already known (we have its meta-data in
--           CouchDB), the temporary file is named by the content-hash.  Once
--           download completes, file is renamed to its canonical name.
--
--    In both scenarios, the file size will be known when the temporary file is
--    created, so an attempt is made to preallocate the entire file using the
--    `fallocate()` function, which calls the Linux ``fallocate`` command.
--    """
--
--    def __init__(self, parent=None, dotdir='.dmedia'):
--        if parent is None:
--            parent = tempfile.mkdtemp(prefix='store.')
--        self.parent = safe_path(parent)
--        if not path.isdir(self.parent):
--            raise ValueError('%s.parent not a directory: %r' %
--                (self.__class__.__name__, parent)
--            )
--        self.base = path.join(self.parent, dotdir)
--        try:
--            os.mkdir(self.base)
--        except OSError:
--            pass
--        if not path.isdir(self.base):
--            raise ValueError('%s.base not a directory: %r' %
--                (self.__class__.__name__, self.base)
--            )
--        if path.islink(self.base):
--            raise ValueError('{!r} is symlink to {!r}'.format(
--                    self.base, os.readlink(self.base)
--                )
--            )
--
--    def __repr__(self):
--        return '%s(%r)' % (self.__class__.__name__, self.parent)
--
--    ############################################
--    # Methods to prevent path traversals attacks
--    def check_path(self, pathname):
--        """
--        Verify that *pathname* in inside this filestore base directory.
--        """
--        abspath = path.abspath(pathname)
--        if abspath.startswith(self.base + os.sep):
--            return abspath
--        raise FileStoreTraversal(
--            pathname=pathname, base=self.base, abspath=abspath
--        )
--
--    def join(self, *parts):
--        """
--        Safely join *parts* with base directory to prevent path traversal.
--
--        For security reasons, it's very important that you use this method
--        rather than ``path.join()`` directly.  This method will prevent path
--        traversal attacks, ``path.join()`` will not.
--
--        For example:
--
--        >>> fs = FileStore()
--        >>> fs.join('NW', 'BNVXVK5DQGIOW7MYR4K3KA5K22W7NW')  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/NW/BNVXVK5DQGIOW7MYR4K3KA5K22W7NW'
--
--        However, a `FileStoreTraversal` is raised if *parts* cause a path
--        traversal outside of the `FileStore` base directory:
--
--        >>> fs.join('../ssh')  #doctest: +ELLIPSIS
--        Traceback (most recent call last):
--          ...
--        FileStoreTraversal: '/tmp/.../ssh' outside base '/tmp/.../.dmedia'
--
--        Or Likewise if an absolute path is included in *parts*:
--
--        >>> fs.join('NW', '/etc', 'ssh')  #doctest: +ELLIPSIS
--        Traceback (most recent call last):
--          ...
--        FileStoreTraversal: '/etc/ssh' outside base '/tmp/.../.dmedia'
--
--        Also see `FileStore.create_parent()`.
--        """
--        fullpath = path.join(self.base, *parts)
--        return self.check_path(fullpath)
--
--    def create_parent(self, filename):
--        """
--        Safely create the directory containing *filename*.
--
--        To prevent path traversal attacks, this method will only create
--        directories within the `FileStore` base directory.  For example:
--
--        >>> fs = FileStore()
--        >>> fs.create_parent('/foo/my.ogv')  #doctest: +ELLIPSIS
--        Traceback (most recent call last):
--          ...
--        FileStoreTraversal: '/foo/my.ogv' outside base '/tmp/.../.dmedia'
--
--        It also protects against malicious filenames like this:
--
--        >>> fs.create_parent('/foo/../bar/my.ogv')  #doctest: +ELLIPSIS
--        Traceback (most recent call last):
--          ...
--        FileStoreTraversal: '/bar/my.ogv' outside base '/tmp/.../.dmedia'
--
--        If doesn't already exists, the directory containing *filename* is
--        created.  Returns the directory containing *filename*.
--
--        Also see `FileStore.join()`.
--        """
--        filename = self.check_path(filename)
--        containing = path.dirname(filename)
--        if not path.exists(containing):
--            os.makedirs(containing)
--        return containing
--
--
--    #################################################
--    # Methods for working with files in the FileStore
--    @staticmethod
--    def relpath(chash, ext=None):
--        """
--        Relative path of file with *chash*, ending with *ext*.
--
--        For example:
--
--        >>> FileStore.relpath('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW')
--        ('NW', 'BNVXVK5DQGIOW7MYR4K3KA5K22W7NW')
--
--        Or with the file extension *ext*:
--
--        >>> FileStore.relpath('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW', ext='mov')
--        ('NW', 'BNVXVK5DQGIOW7MYR4K3KA5K22W7NW.mov')
--
--        Also see `FileStore.reltmp()`.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        chash = safe_b32(chash)
--        dname = chash[:2]
--        fname = chash[2:]
--        if ext:
--            return (dname, '.'.join((fname, safe_ext(ext))))
--        return (dname, fname)
--
--    def path(self, chash, ext=None, create=False):
--        """
--        Returns path of file with content-hash *chash* and extension *ext*.
--
--        For example:
--
--        >>> fs = FileStore()
--        >>> fs.path('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW')  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/NW/BNVXVK5DQGIOW7MYR4K3KA5K22W7NW'
--
--        Or with a file extension:
--
--        >>> fs.path('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW', 'txt')  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/NW/BNVXVK5DQGIOW7MYR4K3KA5K22W7NW.txt'
--
--        If called with ``create=True``, the parent directory is created with
--        `FileStore.create_parent()`.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        :param create: if ``True``, create parent directory if it does not
--            already exist; default is ``False``
--        """
--        filename = self.join(*self.relpath(chash, ext))
--        if create:
--            self.create_parent(filename)
--        return filename
--
--    def exists(self, chash, ext=None):
--        """
--        Return ``True`` if a file with *chash* and *ext* exists.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        return path.isfile(self.path(chash, ext))
--
--    def open(self, chash, ext=None):
--        """
--        Open the file with *chash* and *ext* in ``'rb'`` mode.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        return open(self.path(chash, ext), 'rb')
--
--    def verify(self, chash, ext=None):
--        """
--        Verify integrity of file with *chash* and *ext*.
--
--        If the file's content-hash does not equal *chash*, an `IntegrityError`
--        is raised.
--
--        Otherwise, the open ``file`` is returned after calling ``file.seek(0)``
--        to put read position back at the start of the file.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        src_fp = self.open(chash, ext)
--        h = HashList(src_fp)
--        got = h.run()
--        if got != chash:
--            corrupted = self.join(*self.reltmp2('corrupted', chash, ext))
--            self.create_parent(corrupted)
--            os.rename(src_fp.name, corrupted)
--            src_fp.close()
--            raise IntegrityError(got=got, expected=chash, filename=corrupted)
--        src_fp.seek(0)
--        return src_fp
--
--    def remove(self, chash, ext=None):
--        """
--        Delete file with *chash* and *ext* from underlying filesystem.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        filename = self.path(chash, ext)
--        log.info('Deleting file %r from %r', filename, self)
--        os.remove(filename)
--
--
--    ###########################################################
--    # Methods for working with temporary files in the FileStore
--    @staticmethod
--    def reltmp(chash, ext=None):
--        """
--        Relative path of temporary file with *chash*, ending with *ext*.
--
--        For example:
--
--        >>> FileStore.reltmp('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW')
--        ('transfers', 'NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW')
--
--        Or with the file extension *ext*:
--
--        >>> FileStore.reltmp('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW', ext='mov')
--        ('transfers', 'NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW.mov')
--
--        Also see `FileStore.relpath()`.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        chash = safe_b32(chash)
--        if ext:
--            return (TRANSFERS_DIR, '.'.join([chash, safe_ext(ext)]))
--        return (TRANSFERS_DIR, chash)
--
--    @staticmethod
--    def reltmp2(state, chash, ext=None):
--        assert state in ('transfers', 'corrupted')
--        chash = safe_b32(chash)
--        if ext:
--            return (state, '.'.join([chash, safe_ext(ext)]))
--        return (state, chash)
--
--    def tmp(self, chash, ext=None, create=False):
--        """
--        Returns path of temporary file with *chash*, ending with *ext*.
--
--        These temporary files are used for file transfers between dmedia peers,
--        in which case the content-hash is already known.  For example:
--
--        >>> fs = FileStore()
--        >>> fs.tmp('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW')  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/transfers/NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW'
--
--        Or with a file extension:
--
--        >>> fs.tmp('NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW', 'txt')  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/transfers/NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW.txt'
--
--        If called with ``create=True``, the parent directory is created with
--        `FileStore.create_parent()`.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        :param create: if ``True``, create parent directory if it does not
--            already exist; default is ``False``
--        """
--        filename = self.join(*self.reltmp(chash, ext))
--        if create:
--            self.create_parent(filename)
--        return filename
--
--    def allocate_for_transfer(self, size, chash, ext=None):
--        """
--        Open the canonical temporary file for a transfer (download or upload).
--
--        When transferring files from other dmedia peers, the content-hash is
--        already known.  As we must be able to easily resume a download or
--        upload, transfers use a stable, canonical temporary filename derived
--        from the content-hash and file extension.
--
--        The file *size* is also known, so an attempt is made to efficiently
--        pre-allocate the temporary file using `fallocate()`.
--
--        If the temporary file already exists, it means we're resuming a
--        transfer.  The file is opened in ``'r+b'`` mode, leaving data in the
--        temporary file intact.  It is the responsibility of higher-level code
--        to verify the file leaf by leaf in order to determine what portions of
--        the file have been transfered, what portions of the file still need to
--        be transferred.
--
--        Note that as the temporary file will likely be pre-allocated, higher-
--        level code cannot use the size of the temporary file as a means of
--        determining how much of the file has been transfered.
--
--        If the temporary does not exist, and cannot be pre-allocated, a new
--        empty file is opened in ``'wb'`` mode.  Higher-level code must check
--        the mode of the ``file`` instance and act accordingly.
--
--        :param size: file size in bytes (an ``int``)
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        filename = self.tmp(chash, ext, create=True)
--        fallocate(size, filename)
--        try:
--            fp = open(filename, 'r+b')
--            if os.fstat(fp.fileno()).st_size > size:
--                fp.truncate(size)
--            return fp
--        except IOError:
--            return open(filename, 'wb')
--
--    def allocate_for_import(self, size, ext=None):
--        """
--        Open a random temporary file for an import operation.
--
--        When importing a file, the content-hash is computed as the file is
--        copied into the `FileStore`.  As the content-hash isn't known when
--        allocating the temporary file, a randomly named temporary file is used.
--
--        However, the file *size* is known, so an attempt is made to efficiently
--        pre-allocate the temporary file using `fallocate()`.
--
--        The file extension *ext* is optional and serves no other purpose than to
--        aid in debugging.  The value of *ext* used here has no effect on the
--        ultimate canonical file name.
--
--        :param size: file size in bytes (an ``int``)
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        imports = self.join(IMPORTS_DIR)
--        if not path.exists(imports):
--            os.makedirs(imports)
--        suffix = ('' if ext is None else '.' + ext)
--        (fileno, filename) = tempfile.mkstemp(suffix=suffix, dir=imports)
--        fallocate(size, filename)
--        # FIXME: This probably isn't the best approach, but for now it works:
--        tmp_fp = open(filename, 'r+b')
--        os.close(fileno)
--        return tmp_fp
--
--    def allocate_for_write(self, ext=None):
--        """
--        Open a random temporary file for a write operation.
--
--        Use this method to allocated a temporary file for cases when the file
--        size is not known in advance, eg when transcoding or rendering.
--
--        The file extension *ext* is optional and serves no other purpose than to
--        aid in debugging.  The value of *ext* used here has no effect on the
--        ultimate canonical file name.
--
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        writes = self.join(WRITES_DIR)
--        if not path.exists(writes):
--            os.makedirs(writes)
--        suffix = ('' if ext is None else '.' + ext)
--        (fileno, filename) = tempfile.mkstemp(suffix=suffix, dir=writes)
--        tmp_fp = open(filename, 'r+b')
--        os.close(fileno)
--        return tmp_fp
--
--    def tmp_move(self, tmp_fp, chash, ext=None):
--        """
--        Move temporary file into its canonical location.
--
--        This method will securely and atomically move a temporary file into its
--        canonical location.
--
--        For example:
--
--        >>> fs = FileStore()
--        >>> tmp_fp = open(fs.join('foo.mov'), 'wb')
--        >>> chash = 'ZR765XWSF6S7JQHLUI4GCG5BHGPE252O'
--        >>> fs.tmp_move(tmp_fp, chash, 'mov')  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/ZR/765XWSF6S7JQHLUI4GCG5BHGPE252O.mov'
--
--        Note, however, that this method does *not* verify the content hash of
--        the temporary file!  This is by design as many operations will compute
--        the content hash as they write to the temporary file.  Other operations
--        should use `FileStore.tmp_verify_move()` to verify and move in one step.
--
--        Regardless, the full content hash should have been verified prior to
--        calling this method.  To ensure the content is not modified, operations
--        must take these steps:
--
--            1. Open *tmp_fp* and keep it open, thereby retaining a lock on the
--               file
--
--            2. Compute the full content hash, which can be done as content is
--               written to *tmp_fp* (open in mode ``'r+b'`` to resume a transfer,
--               but hash of previously transfered leaves must still be verified)
--
--            3. With *tmp_fp* still open, move the temporary file into its
--               canonical location using this method.
--
--        As a simple locking mechanism, this method takes an open ``file`` rather
--        than a filename, thereby preventing the file from being modified during
--        the move.  A ``ValueError`` is raised if *tmp_fp* is already closed.
--
--        For portability reasons, this method requires that *tmp_fp* be opened in
--        a binary mode: ``'rb'``, ``'wb'``, or ``'r+b'``.  A ``ValueError`` is
--        raised if opened in any other mode.
--
--        For security reasons, this method will only move a temporary file
--        located within the ``FileStore.base`` directory or a subdirectory
--        thereof.  If an attempt is made to move a file from outside the store,
--        `FileStoreTraversal` is raised.  See `FileStore.check_path()`.
--
--        Just prior to moving the file, a call to ``os.fchmod()`` is made to set
--        read-only permissions (0444).  After the move, *tmp_fp* is closed.
--
--        If the canonical file already exists, `DuplicateFile` is raised.
--
--        The return value is the absolute path of the canonical file.
--
--        :param tmp_fp: a ``file`` instance created with ``open()``
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        # Validate tmp_fp:
--        if not isinstance(tmp_fp, file):
--            raise TypeError(
--                TYPE_ERROR % ('tmp_fp', file, type(tmp_fp), tmp_fp)
--            )
--        if tmp_fp.mode not in ('rb', 'wb', 'r+b'):
--            raise ValueError(
--                "tmp_fp: mode must be 'rb', 'wb', or 'r+b'; got %r" % tmp_fp.mode
--            )
--        if tmp_fp.closed:
--            raise ValueError('tmp_fp is closed, must be open: %r' % tmp_fp.name)
--        self.check_path(tmp_fp.name)
--
--        # Get canonical name, check for duplicate:
--        dst = self.path(chash, ext, create=True)
--        if path.exists(dst):
--            raise DuplicateFile(chash=chash, src=tmp_fp.name, dst=dst)
--
--        # Set file to read-only (0444) and move into canonical location
--        log.info('Moving file %r to %r', tmp_fp.name, dst)
--        fileno = tmp_fp.fileno()
--        tmp_fp.flush()
--        os.fsync(fileno)
--        os.fchmod(fileno, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
--        os.rename(tmp_fp.name, dst)
--        tmp_fp.close()
--
--        # Return canonical filename:
--        return dst
--
--    def tmp_hash_move(self, tmp_fp, ext=None):
--        """
--        Hash temporary file, then move into its canonical location.
--        """
--        h = HashList(tmp_fp)
--        chash = h.run()
--        self.tmp_move(tmp_fp, chash, ext)
--        return (chash, h.leaves)
--
--    def tmp_verify_move(self, chash, ext=None):
--        """
--        Verify temporary file, then move into its canonical location.
--
--        This method will check the content hash of the canonically-named
--        temporary file with content hash *chash* and extension *ext*.  If the
--        content hash is correct, this method will then move the temporary file
--        into its canonical location using `FileStore.tmp_move()`.
--
--        If the content hash is incorrect, `IntegrityError` is raised.  If the
--        canonical file already exists, `DuplicateFile` is raised.  Lastly, if
--        the temporary does not exist, ``IOError`` is raised.
--
--        This method will typically be used with the BitTorrent downloader or
--        similar, in which case the content hash will be known prior to
--        downloading.  The downloader will first determine the canonical
--        temporary file name, like this:
--
--        >>> fs = FileStore()
--        >>> tmp = fs.tmp('TGX33XXWU3EVHEEY5J7NBOJGKBFXLEBK', 'mov', create=True)
--        >>> tmp  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/transfers/TGX33XXWU3EVHEEY5J7NBOJGKBFXLEBK.mov'
--
--        Then the downloader will write to the temporary file as it's being
--        downloaded:
--
--        >>> from dmedia.tests import sample_mov  # Sample .MOV file
--        >>> src_fp = open(sample_mov, 'rb')
--        >>> tmp_fp = open(tmp, 'wb')
--        >>> while True:
--        ...     chunk = src_fp.read(2**20)  # Read in 1MiB chunks
--        ...     if not chunk:
--        ...         break
--        ...     tmp_fp.write(chunk)
--        ...
--        >>> tmp_fp.close()
--
--        Finally, the downloader will move the temporary file into its canonical
--        location:
--
--        >>> dst = fs.tmp_verify_move('TGX33XXWU3EVHEEY5J7NBOJGKBFXLEBK', 'mov')
--        >>> dst  #doctest: +ELLIPSIS
--        '/tmp/.../.dmedia/TG/X33XXWU3EVHEEY5J7NBOJGKBFXLEBK.mov'
--
--        The return value is the absolute path of the canonical file.
--
--        :param chash: base32-encoded content-hash
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        tmp = self.tmp(chash, ext)
--        tmp_fp = open(tmp, 'rb')
--        h = HashList(tmp_fp)
--        got = h.run()
--        if got != chash:
--            raise IntegrityError(got=got, expected=chash, filename=tmp_fp.name)
--        return self.tmp_move(tmp_fp, chash, ext)
--
--    def import_file(self, src_fp, ext=None):
--        """
--        Atomically copy open file *src_fp* into this file store.
--
--        The method will compute the content-hash of *src_fp* as it copies it to
--        a temporary file within this store.  Once the copying is complete, the
--        temporary file will be moved to its canonical location using
--        `FileStore.tmp_move()`.
--
--        A `DuplicateFile` exception will be raised if the file already exists
--        in this store.
--
--        This method returns a ``(chash, leaves)`` tuple with the content hash
--        (top-hash) and a list of the content hashes of the leaves.  See
--        `HashList` for details.
--
--        Note that *src_fp* must have been opened in ``'rb'`` mode.
--
--        :param src_fp: a ``file`` instance created with ``open()``
--        :param ext: normalized lowercase file extension, eg ``'mov'``
--        """
--        size = os.fstat(src_fp.fileno()).st_size
--        tmp_fp = self.allocate_for_import(size, ext)
--        h = HashList(src_fp, tmp_fp)
--        log.info('Importing file %r into %r', src_fp.name, self)
--        chash = h.run()
--        try:
--            self.tmp_move(tmp_fp, chash, ext)
--        except DuplicateFile as e:
--            log.warning('File %r is duplicate of %r', src_fp.name, e.dst)
--            raise DuplicateFile(src=src_fp.name, dst=e.dst, tmp=e.src,
--                chash=chash, leaves=h.leaves
--            )
--        return (chash, h.leaves)
 === modified file 'dmedia/importer.py'
 --- dmedia/importer.py	2011-09-16 04:50:48 +0000
 +++ dmedia/importer.py	2011-09-22 11:43:29 +0000
@@ -25,352 +25,20 @@
  Store media files based on content-hash.
  """
--import os
--from os import path
--import mimetypes
  import time
--from base64 import b64encode
++from copy import deepcopy
++from subprocess import check_call
  import logging
  import microfiber
--
--from .schema import (
--    random_id, create_file, create_batch, create_import, create_drive,
--    create_partition
--)
--from .errors import DuplicateFile
--from .workers import (
--    CouchWorker, CouchManager, register, isregistered, exception_name
--)
--from .filestore import FileStore, quick_id, safe_open, safe_ext, pack_leaves
--from .extractor import merge_metadata
--
--mimetypes.init()
--DOTDIR = '.dmedia'
++from filestore import FileStore, scandir, batch_import_iter
++
++from dmedia import workers, schema
++
++
  log = logging.getLogger()
--# FIXME: This needs to be done with some real inspection of the file contents,
--# but this is just a stopgap for the sake of getting the schema stable:
--MEDIA_MAP = {
--    'ogv': 'video',
--    'mov': 'video',
--    'avi': 'video',
--    'mts': 'video',
--
--    'oga': 'audio',
--    'flac': 'audio',
--    'wav': 'audio',
--    'mp3': 'audio',
--
--    'jpg': 'image',
--    'cr2': 'image',
--    'png': 'image',
--}
--
--
--def normalize_ext(name):
--    """
--    Return (root, ext) from *name* where extension is normalized to lower-case.
--
--    If *name* has no extension, ``None`` is returned as 2nd item in (root, ext)
--    tuple:
--
--    >>> normalize_ext('IMG_2140.CR2')
--    ('IMG_2140', 'cr2')
--    >>> normalize_ext('test.jpg')
--    ('test', 'jpg')
--    >>> normalize_ext('hello_world')
--    ('hello_world', None)
--    """
--    parts = name.rsplit('.', 1)
--    if len(parts) == 2:
--        (root, ext) = parts
--        if root and ext:
--            try:
--                return (root, safe_ext(ext.lower()))
--            except (ValueError, TypeError):
--                pass
--    return (name, None)
--
--
--def scanfiles(base, extensions=None):
--    """
--    Recursively iterate through files in directory *base*.
--    """
--    try:
--        names = sorted(os.listdir(base))
--    except StandardError:
--        return
--    dirs = []
--    for name in names:
--        if name.startswith('.') or name.endswith('~'):
--            continue
--        fullname = path.join(base, name)
--        if path.islink(fullname):
--            continue
--        if path.isfile(fullname):
--            (root, ext) = normalize_ext(name)
--            if extensions is None or ext in extensions:
--                yield {
--                    'src': fullname,
--                    'base': base,
--                    'root': root,
--                    'doc': {
--                        'name': name,
--                        'ext': ext,
--                    },
--                }
--        elif path.isdir(fullname):
--            dirs.append(fullname)
--    for fullname in dirs:
--        for d in scanfiles(fullname, extensions):
--            yield d
--
--
--def files_iter(base):
--    """
--    Recursively iterate through files in directory *base*.
--
--    This is used for importing files from a card, after which the card will be
--    automatically formatted, so we always import all files to be on the safe
--    side.
--
--    On the other hand, `scanfiles()` is used for migrating an existing library
--    to dmedia... in which case we want to be more selective about which files to
--    consider.
--
--    Note that `files_iter()` does not catch errors like ``OSError``.  We
--    specifically want these errors to propagate up!  We don't want a permission
--    error to be interpreted as there being no files on the card!
--    """
--    if path.isfile(base):
--        s = os.stat(base)
--        yield (base, s.st_size, s.st_mtime)
--        return
--    names = sorted(os.listdir(base))
--    dirs = []
--    for name in names:
--        fullname = path.join(base, name)
--        if path.islink(fullname):
--            continue
--        if path.isfile(fullname):
--            s = os.stat(fullname)
--            yield (fullname, s.st_size, s.st_mtime)
--        elif path.isdir(fullname):
--            dirs.append(fullname)
--    for fullname in dirs:
--        for tup in files_iter(fullname):
--            yield tup
--
--
--class ImportWorker(CouchWorker):
--    def __init__(self, env, q, key, args):
--        super(ImportWorker, self).__init__(env, q, key, args)
--        (self.base, self.extract) = args
--        self.filestore = FileStore(self.env['filestore']['path'])
--        self.filestore_id = self.env['filestore']['_id']
--
--        self.filetuples = None
--        self._processed = []
--        self.doc = None
--        self._id = None
--
--    def execute(self, base, extract=False):
--        import_id = self.start()
--        self.emit('started', import_id)
--
--        files = self.scanfiles()
--        total = len(files)
--        self.emit('count', import_id, total)
--
--        c = 1
--        for (src, action) in self.import_all_iter():
--            self.emit('progress', import_id, c, total,
--                dict(
--                    action=action,
--                    src=src,
--                )
--            )
--            c += 1
--
--        stats = self.finalize()
--        self.emit('finished', import_id, stats)
--
--    def save(self):
--        """
--        Save current 'dmedia/import' record to CouchDB.
--        """
--        self.db.save(self.doc)
--
--    def start(self):
--        """
--        Create the initial 'dmedia/import' record, return that record's ID.
--        """
--        assert self._id is None
--        #drive = create_drive(self.base)
--        #partition = create_partition(self.base)
--        self.doc = create_import(self.base,
--            None, #partition['_id'],
--            batch_id=self.env.get('batch_id'),
--            machine_id=self.env.get('machine_id'),
--        )
--        self._id = self.doc['_id']
--        self.save()
--        #try:
--        #    self.db.save(drive)
--        #except microfiber.Conflict:
--        #    pass
--        #try:
--        #    self.db.save(partition)
--        #except microfiber.Conflict:
--        #    pass
--        return self._id
--
--    def scanfiles(self):
--        """
--        Build list of files that will be considered for import.
--
--        After this method has been called, the ``Importer.filetuples`` attribute
--        will contain ``(filename,size,mtime)`` tuples for all files being
--        considered.  This information is saved into the dmedia/import record to
--        provide a rich audio trail and aid in debugging.
--        """
--        assert self.filetuples is None
--        self.filetuples = tuple(files_iter(self.base))
--        self.doc['log']['considered'] = [
--            {'src': src, 'bytes': size, 'mtime': mtime}
--            for (src, size, mtime) in self.filetuples
--        ]
--        total_bytes = sum(size for (src, size, mtime) in self.filetuples)
--        self.doc['stats']['considered'] = {
--            'count': len(self.filetuples), 'bytes': total_bytes
--        }
--        self.save()
--        return self.filetuples
--
--    def _import_file(self, src):
--        """
--        Attempt to import *src* into dmedia library.
--        """
--        fp = safe_open(src, 'rb')
--        stat = os.fstat(fp.fileno())
--        if stat.st_size == 0:
--            log.warning('File size is zero: %r', src)
--            return ('empty', None)
--
--        name = path.basename(src)
--        (root, ext) = normalize_ext(name)
--        try:
--            (chash, leaves) = self.filestore.import_file(fp, ext)
--            action = 'imported'
--        except DuplicateFile as e:
--            chash = e.chash
--            leaves = e.leaves
--            action = 'skipped'
--            assert e.tmp.startswith(self.filestore.join('imports'))
--            # FIXME: We should really probably move this into duplicates/ or
--            # something and not delete till we verify integrity of what is
--            # already in the filestore.
--            os.remove(e.tmp)
--
--        try:
--            doc = self.db.get(chash)
--            if self.filestore_id not in doc['stored']:
--                doc['stored'][self.filestore_id] =  {
--                    'copies': 1,
--                    'time': time.time(),
--                }
--                self.db.save(doc)
--            return (action, doc)
--        except microfiber.NotFound as e:
--            pass
--
--        leaf_hashes = b''.join(leaves)
--        stored = {
--            self.filestore_id: {
--                'copies': 1,
--            }
--        }
--        doc = create_file(chash, stat.st_size, leaf_hashes, stored, ext=ext)
--        assert doc['_id'] == chash
--        doc.update(
--            import_id=self._id,
--            mtime=stat.st_mtime,
--            name=name,
--            dir=path.relpath(path.dirname(src), self.base),
--        )
--        if ext:
--            doc['content_type'] = mimetypes.types_map.get('.' + ext)
--            doc['media'] = MEDIA_MAP.get(ext)
--        if self.extract:
--            merge_metadata(src, doc)
--        r = self.db.save(doc)
--        assert r['id'] == chash
--        return (action, doc)
--
--    def import_file(self, src, size):
--        """
--        Wraps `Importer._import_file()` with error handling and logging.
--        """
--        self._processed.append(src)
--        try:
--            (action, doc) = self._import_file(src)
--            if action == 'empty':
--                entry = src
--            else:
--                entry = {
--                    'src': src,
--                    'id': doc['_id'],
--                }
--        except Exception as e:
--            log.exception('Error importing %r', src)
--            action = 'error'
--            entry = {
--                'src': src,
--                'name': exception_name(e),
--                'msg': str(e),
--            }
--        self.doc['log'][action].append(entry)
--        self.doc['stats'][action]['count'] += 1
--        self.doc['stats'][action]['bytes'] += size
--        if action == 'error':
--            self.save()
--        return (action, entry)
--
--    def import_all_iter(self):
--        for (src, size, mtime) in self.filetuples:
--            (action, entry) = self.import_file(src, size)
--            yield (src, action)
--
--    def finalize(self):
--        """
--        Finalize import and save final import record to CouchDB.
--
--        The method will add the ``"time_end"`` key into the import record and
--        save it to CouchDB.  There will likely also be being changes in the
--        ``"log"`` and ``"stats"`` keys, which will likewise be saved to CouchDB.
--        """
--        assert len(self.filetuples) == len(self._processed)
--        assert list(t[0] for t in self.filetuples) == self._processed
--        self.doc['time_end'] = time.time()
--        self.save()
--        dt = self.doc['time_end'] - self.doc['time']
--        log.info('Completed import of %r in %d:%02d',
--            self.base, dt / 60, dt % 60
--        )
--        return self.doc['stats']
--
--
--def to_dbus_stats(stats):
--    return dict(
--        imported=stats['imported']['count'],
--        imported_bytes=stats['imported']['bytes'],
--        skipped=stats['skipped']['count'],
--        skipped_bytes=stats['skipped']['bytes'],
--    )
--
--
  def accumulate_stats(accum, stats):
      for (key, d) in stats.items():
          if key not in accum:
@@ -379,45 +47,131 @@
              accum[key][k] += v
--class ImportManager(CouchManager):
++class ImportWorker(workers.CouchWorker):
++    def __init__(self, env, q, key, args):
++        super().__init__(env, q, key, args)
++        self.basedir = args[0]
++        self.id = None
++        self.doc = None
++
++    def execute(self, basedir):
++        self.start()
++        self.scan()
++        self.import_all()
++
++    def start(self):
++        self.doc = schema.create_import(self.basedir,
++            machine_id=self.env.get('machine_id'),
++            batch_id=self.env.get('batch_id'),
++        )
++        self.id = self.doc['_id']
++        self.db.save(self.doc)
++        self.emit('started', self.id)
++
++    def scan(self):
++        self.batch = scandir(self.basedir)
++        self.doc['stats']['total'] = {
++            'bytes': self.batch.size,
++            'count': self.batch.count,
++        }
++        self.doc['import_order'] = [file.name for file in self.batch.files]
++        self.doc['files'] = dict(
++            (file.name, {'bytes': file.size, 'mtime': file.mtime})
++            for file in self.batch.files
++        )
++        self.db.save(self.doc)
++        self.emit('scanned', self.batch.count, self.batch.size)
++
++    def get_filestores(self):
++        stores = []
++        for doc in self.env['filestores']:
++            fs = FileStore(doc['parentdir'])
++            fs.id = doc['_id']
++            fs.copies = doc['copies']
++            stores.append(fs)
++        return stores
++
++    def import_all(self):
++        stores = self.get_filestores()
++        try:
++            for (status, file, doc) in self.import_iter(*stores):
++                self.doc['stats'][status]['count'] += 1
++                self.doc['stats'][status]['bytes'] += file.size
++                self.doc['files'][file.name]['status'] = status
++                if doc is not None:
++                    self.db.save(doc)
++                    self.doc['files'][file.name]['id'] = doc['_id']
++                self.emit('progress', file.size)
++            self.doc['time_end'] = time.time()
++        finally:
++            self.db.save(self.doc)
++        self.emit('finished', self.doc['stats'])
++
++    def import_iter(self, *filestores):
++        common = {
++            'import_id': self.id,
++            'machine_id': self.env.get('machine_id'),
++            'batch_id': self.env.get('batch_id'),
++        }
++        for (file, ch) in batch_import_iter(self.batch, *filestores):
++            if ch is None:
++                assert file.size == 0
++                yield ('empty', file, None)
++                continue
++            stored = dict(
++                (fs.id, {'copies': fs.copies, 'mtime': fs.stat(ch.id).mtime})
++                for fs in filestores
++            )
++            try:
++                doc = self.db.get(ch.id)
++                doc['stored'].update(stored)
++                yield ('duplicate', file, doc)
++            except microfiber.NotFound:
++                doc = schema.create_file(
++                    ch.id, ch.file_size, ch.leaf_hashes, stored
++                )
++                doc['import'] = {
++                    'src': file.name,
++                    'mtime': file.mtime,
++                }
++                doc['import'].update(common)
++                doc['ctime'] = file.mtime
++                yield ('new', file, doc)
++
++
++class ImportManager(workers.CouchManager):
      def __init__(self, env, callback=None):
--        super(ImportManager, self).__init__(env, callback)
++        super().__init__(env, callback)
          self.doc = None
--        self._total = 0
--        self._completed = 0
--        if not isregistered(ImportWorker):
--            register(ImportWorker)
--
--    def save(self):
--        """
--        Save current 'dmedia/batch' record to CouchDB.
--        """
--        self.db.save(self.doc)
++        self._reset_counters()
++        if not workers.isregistered(ImportWorker):
++            workers.register(ImportWorker)
++
++    def _reset_counters(self):
++        self._count = 0
++        self._total_count = 0
++        self._bytes = 0
++        self._total_bytes = 0
++
++    def get_worker_env(self, worker, key, args):
++        env = deepcopy(self.env)
++        env['batch_id'] = self.doc['_id']
++        return env
      def first_worker_starting(self):
          assert self.doc is None
          assert self._workers == {}
--        self._total = 0
--        self._completed = 0
--        self.doc = create_batch(self.env.get('machine_id'))
--        self.save()
--        self.emit('BatchStarted', self.doc['_id'])
++        self._reset_counters()
++        self.doc = schema.create_batch(self.env.get('machine_id'))
++        self.db.save(self.doc)
++        self.emit('batch_started', self.doc['_id'])
      def last_worker_finished(self):
          assert self._workers == {}
          self.doc['time_end'] = time.time()
--        self.save()
--        self.emit('BatchFinished', self.doc['_id'],
--            to_dbus_stats(self.doc['stats'])
--        )
++        self.db.save(self.doc)
++        self.emit('batch_finished', self.doc['_id'], self.doc['stats'])
          self.doc = None
--        log.info('Batch complete, compacting database...')
--        self.db.post(None, '_compact')
--
--    def get_worker_env(self, worker, key, args):
--        env = dict(self.env)
--        env['batch_id'] = self.doc['_id']
--        return env
      def on_error(self, key, exception, message):
          super(ImportManager, self).on_error(key, exception, message)
@@ -426,32 +180,36 @@
          self.doc['errors'].append(
              {'key': key, 'name': exception, 'msg': message}
+         )
--        self.save()
++        self.db.save(self.doc)
      def on_started(self, key, import_id):
          self.doc['imports'].append(import_id)
--        self.save()
--        self.emit('ImportStarted', key, import_id)
--
--    def on_count(self, key, import_id, total):
--        self._total += total
--        self.emit('ImportCount', key, import_id, total)
--
--    def on_progress(self, key, import_id, completed, total, info):
--        self._completed += 1
--        self.emit('ImportProgress', key, import_id, completed, total, info)
--
--    def on_finished(self, key, import_id, stats):
++        self.db.save(self.doc)
++        self.emit('import_started', key, import_id)
++
++    def on_scanned(self, key, total_count, total_bytes):
++        self._total_count += total_count
++        self._total_bytes += total_bytes
++        self.emit('batch_progress',
++            self._count, self._total_count,
++            self._bytes, self._total_bytes,
++        )
++
++    def on_progress(self, key, file_size):
++        self._count += 1
++        self._bytes += file_size
++        self.emit('batch_progress',
++            self._count, self._total_count,
++            self._bytes, self._total_bytes,
++        )
++
++    def on_finished(self, key, stats):
          accumulate_stats(self.doc['stats'], stats)
--        self.save()
--        self.emit('ImportFinished', key, import_id, to_dbus_stats(stats))
++        self.db.save(self.doc)
      def get_batch_progress(self):
          with self._lock:
--            return dict(
--                completed=self._completed,
--                total=self._total,
--            )
++            return (self._count, self._total_count, self._bytes, self._total_bytes)
--    def start_import(self, base, extract=True):
--        return self.start_job('ImportWorker', base, base, extract)
++    def start_import(self, base):
++        return self.start_job('ImportWorker', base, base)
 === modified file 'dmedia/schema.py'
 --- dmedia/schema.py	2011-09-16 04:19:18 +0000
 +++ dmedia/schema.py	2011-09-22 11:43:29 +0000
@@ -54,8 +54,7 @@
  These test functions are used in the dmedia test suite, and 3rd-party apps would
--be well served by doing the same.  Please read on for the rationale of some key
--dmedia schema design decisions...
++be well served by doing the same.
@@ -77,69 +76,6 @@
  filename directly from a document ID is an important design consideration.
--Random IDs
------------
--
--Random IDs are 120-bit random numbers, base32-encoded.  They're much like a
--Version 4 (random) UUID, except dmedia random IDs have no reserved bits.  For
--example:
--
-->>> random_id()  #doctest: +SKIP
--'NZXXMYLDOV2F6ZTUO5PWM5DX'
--
--
--Intrinsic IDs
---------------
--
--Files in the dmedia library are uniquely identified by their content-hash.
--dmedia *is* a distributed filesystem, but a quite simple one in that it only
--stores intrinsically-named, read-only files.
--
--The content-hash is computed as a hash-list (a 1 deep tree-hash).  Currently
--this is done using the sha1 hash function with an 8 MiB leaf size, but dmedia
--is moving to Skein for the final hashing protocol.
--
--The content-hashes of the individual leaves are stored in the "leaves"
--attachment in the CouchDB document.  This allows for file integrity checks with
--8 MiB granularity, and provides the basis for cryptographically robust swarm
--upload and download.
--
--The base32-encoded sha1 hash is 32-characters long.  For example:
--
-->>> from dmedia.filestore import HashList
-->>> from dmedia.tests import sample_mov  # Sample .MOV file
-->>> src_fp = open(sample_mov, 'rb')
-->>> hashlist = HashList(src_fp)
-->>> hashlist.run()
--'TGX33XXWU3EVHEEY5J7NBOJGKBFXLEBK'
--
--
--After calling `HashList.run()`, the binary digests of the leaf content-hashes
--are available via the ``leaves`` attribute (which is a ``list``):
--
-->>> from base64 import b32encode
-->>> for d in hashlist.leaves:
--...     print(repr(b32encode(d)))
--...
--'IXJTSUCYYFECGSG6JIB2R77CAJVJK4W3'
--'MA3IAHUOKXR4TRG7CWAPOO7U4WCV5WJ4'
--'FHF7KDMAGNYOVNYSYT6ZYWQLUOCTUADI'
--
--
--The overall file content-hash (aka the top-hash) is a hash of the leaf hashes.
--Note that this matches what was returned by `HashList.run()`:
--
-->>> from hashlib import sha1
-->>> b32encode(sha1(''.join(hashlist.leaves)).digest())
--'ZR765XWSF6S7JQHLUI4GCG5BHGPE252O'
--
--
--In the near future dmedia will very likely migrate to using a 200-bit Skein-512
--hash.  See:
--
--    http://packages.python.org/pyskein/
--
--
  Design Decision: mime-like record types
  =======================================
@@ -310,29 +246,16 @@
  from __future__ import print_function
--from os import urandom
--from base64 import b32encode, b32decode, b64encode
++from base64 import b32encode, b64encode
  from hashlib import sha1
  import re
  import time
  import socket
--import platform
--
--from .constants import TYPE_ERROR, EXT_PAT
--#from .udisks import Device
--
--
--def random_id():
--    """
--    Returns a 120-bit base32-encoded random ID.
--
--    The ID will be 24-characters long, URL and filesystem safe.  For example:
--
--    >>> random_id()  #doctest: +SKIP
--    'OVRHK3TUOUQCWIDMNFXGC4TP'
--
--    """
--    return b32encode(urandom(15))
++
++from filestore import DIGEST_B32LEN, B32ALPHABET, TYPE_ERROR
++from microfiber import random_id, RANDOM_B32LEN
++
++from .constants import EXT_PAT
  # Some private helper functions that don't directly define any schema.
@@ -465,7 +388,7 @@
      >>> _isinstance('18', "doc['bytes']", int)
      Traceback (most recent call last):
        ...
--    TypeError: doc['bytes']: need a <type 'int'>; got a <type 'str'>: '18'
++    TypeError: doc['bytes']: need a <class 'int'>; got a <class 'str'>: '18'
      """
      for a in allowed:
@@ -520,7 +443,8 @@
      """
      value = _value(doc, path)
      label = _label(path)
--    _isinstance(value, label, allowed)
++    if not (allowed is None or isinstance(value, allowed)):
++        raise TypeError(TYPE_ERROR.format(label, allowed, type(value), value))
      if value is None:
          return
      for c in checks:
@@ -542,7 +466,7 @@
      >>> _check_if_exists(doc, ['name'], str)
      Traceback (most recent call last):
        ...
--    TypeError: doc['name']: need a <type 'str'>; got a <type 'int'>: 17
++    TypeError: doc['name']: need a <class 'str'>; got a <class 'int'>: 17
      See also `_check()` and `_exists()`.
@@ -658,79 +582,81 @@
+         )
--def _base32(value, label):
--    """
--    Verify that *value* is a valid base32 encoded document ID.
--
--    Document IDs must:
--
--        1. be valid base32 encoding
--
--        2. decode to data that is a multiple of 5-bytes (40-bits ) in length
--
--    For example, invalid encoding:
--
--    >>> _base32('MZZG2ZDS0QVSW2TEMVZG643F', "doc['_id']")
--    Traceback (most recent call last):
--      ...
--    ValueError: doc['_id']: Non-base32 digit found: 'MZZG2ZDS0QVSW2TEMVZG643F'
--
--    And an invalid value:
--
--    >>> _base32('MFQWCYLBMFQWCYI=', "doc['_id']")
--    Traceback (most recent call last):
--      ...
--    ValueError: len(b32decode(doc['_id'])) not multiple of 5: 'MFQWCYLBMFQWCYI='
--
--    """
--    try:
--        decoded = b32decode(value)
--    except TypeError as e:
--        raise ValueError(
--            '{}: {}: {!r}'.format(label, e, value)
--        )
--    if len(decoded) % 5 != 0:
--        raise ValueError(
--            'len(b32decode({})) not multiple of 5: {!r}'.format(label, value)
++def _any_id(value, label):
++    """
++    Verify that *value* is a base32-encoded ID.
++    """
++    if not isinstance(value, str):
++        raise TypeError(
++            TYPE_ERROR.format(label, str, type(value), value)
++        )
++    if len(value) % 8 != 0:
++        raise ValueError(
++            '{}: length of ID ({}) not multiple of 8: {!r}'.format(
++                    label, len(value), value)
++        )
++    if not set(value).issubset(B32ALPHABET):
++        raise ValueError(
++            '{}: ID not subset of B32ALPHABET: {!r}'.format(
++                    label, value)
+         )
  def _random_id(value, label):
      """
--    Verify that *value* is a 120-bit base32 encoded random ID.
--
--    For example:
--
--    >>> _random_id('EIJ5EVPOJSO5ZBDY', "doc['_id']")
--    Traceback (most recent call last):
--      ...
--    ValueError: doc['_id']: random ID must be 24 characters; got 'EIJ5EVPOJSO5ZBDY'
--
--    """
--    _base32(value, label)
--    if len(value) != 24:
--        raise ValueError(
--            '{}: random ID must be 24 characters; got {!r}'.format(label, value)
--        )
--
--
--def _content_id(value, label):
--    """
--    Verify that *value* is a 160-bit base32 encoded content hash.
--
--    For example:
--
--    >>> _content_id('EIJ5EVPOJSO5ZBDY', "doc['_id']")
--    Traceback (most recent call last):
--      ...
--    ValueError: doc['_id']: content ID must be 32 characters; got 'EIJ5EVPOJSO5ZBDY'
--
--    """
--    _base32(value, label)
--    if len(value) != 32:
--        raise ValueError(
--            '{}: content ID must be 32 characters; got {!r}'.format(label, value)
--        )
++    Verify that *value* is a 120-bit base32-encoded random ID.
++
++    For example, the number ``'1'`` is not a valid base32 character:
++
++    >>> _random_id('1OTXJHVEXTKNXZHCMHDVF276', "doc['_id']")
++    Traceback (most recent call last):
++      ...
++    ValueError: doc['_id']: random ID not subset of B32ALPHABET: '1OTXJHVEXTKNXZHCMHDVF276'
++
++    """
++    if not isinstance(value, str):
++        raise TypeError(
++            TYPE_ERROR.format(label, str, type(value), value)
++        )
++    if len(value) != RANDOM_B32LEN:
++        raise ValueError(
++            '{}: random ID must be {} characters, got {}: {!r}'.format(
++                    label, RANDOM_B32LEN, len(value), value)
++        )
++    if not set(value).issubset(B32ALPHABET):
++        raise ValueError(
++            '{}: random ID not subset of B32ALPHABET: {!r}'.format(
++                    label, value)
++        )
++
++
++def _intrinsic_id(value, label):
++    """
++    Verify that *value* is a 240-bit base32-encoded intrinsic ID.
++
++    For example:
++
++    >>> _intrinsic_id('QE7POGENSF67FGKN2TD3FH4E', "doc['_id']")
++    Traceback (most recent call last):
++      ...
++    ValueError: doc['_id']: intrinsic ID must be 48 characters, got 24: 'QE7POGENSF67FGKN2TD3FH4E'
++
++    """
++    if not isinstance(value, str):
++        raise TypeError(
++            TYPE_ERROR.format(label, str, type(value), value)
++        )
++    if len(value) != DIGEST_B32LEN:
++        raise ValueError(
++            '{}: intrinsic ID must be {} characters, got {}: {!r}'.format(
++                    label, DIGEST_B32LEN, len(value), value)
++        )
++    if not set(value).issubset(B32ALPHABET):
++        raise ValueError(
++            '{}: intrinsic ID not subset of B32ALPHABET: {!r}'.format(
++                    label, value)
++        )
++
  def _drive_id(drive):
      """
@@ -754,60 +680,28 @@
      """
      Verify that *doc* is a valid dmedia document.
--    This verifies that *doc* has the common schema requirements that all dmedia
--    documents should have.  The *doc* must:
--
--        1. Have "_id" that is base32-encoded and when decoded is a multiple
--           of 40-bits (5 bytes)
--
--        2. Have "ver" equal to ``0``
--
--        3. Have "type" that matches ``'dmedia/[a-z]+$'``
--
--        4. Have "time" that is a ``float`` or ``int`` greater than or equal to
--           zero
--
      For example, a conforming value:
      >>> doc = {
      ...     '_id': 'NZXXMYLDOV2F6ZTUO5PWM5DX',
      ...     'ver': 0,
--    ...     'type': 'dmedia/file',
--    ...     'time': 1234567890,
--    ... }
--    ...
--    >>> check_dmedia(doc)
--
--
--    And an invalid value:
--
--    >>> doc = {
--    ...     '_id': 'NZXXMYLDOV2F6ZTUO5PWM5DX',
--    ...     'ver': 0,
--    ...     'kind': 'dmedia/file',  # Changed!
--    ...     'time': 1234567890,
--    ... }
--    ...
--    >>> check_dmedia(doc)
--    Traceback (most recent call last):
--      ...
--    ValueError: doc['type'] does not exist
++    ...     'type': 'dmedia/foo',
++    ...     'time': 1234567890,
++    ... }
++    ...
++    >>> check_dmedia(doc)
      """
      _check(doc, [], dict)
--
--    _check(doc, ['_id'], basestring,
--        _base32,
++    _check(doc, ['_id'], None,
++        _any_id,
+     )
--
      _check(doc, ['ver'], int,
          (_equals, 0),
+     )
--
--    _check(doc, ['type'], basestring,
++    _check(doc, ['type'], str,
          (_matches, 'dmedia/[a-z]+$'),
+     )
--
      _check(doc, ['time'], (int, float),
          (_at_least, 0),
+     )
@@ -820,72 +714,58 @@
      For example, a conforming value:
      >>> doc = {
--    ...     '_id': 'ZR765XWSF6S7JQHLUI4GCG5BHGPE252O',
++    ...     '_id': 'ROHNRBKS6T4YETP5JHEGQ3OLSBDBWRCKR2BKILJOA3CP7QZW',
++    ...     '_attachments': {
++    ...         'leaf_hashes': {
++    ...             'data': 'v7t381LIyKsBCUYhkGreXx2qKTyyMfMD2eHWWp/L',
++    ...             'content_type': 'application/octet-stream',
++    ...         },
++    ...     },
      ...     'ver': 0,
      ...     'type': 'dmedia/file',
      ...     'time': 1234567890,
      ...     'bytes': 20202333,
--    ...     'ext': 'mov',
      ...     'origin': 'user',
      ...     'stored': {
      ...         'MZZG2ZDSOQVSW2TEMVZG643F': {
      ...             'copies': 2,
--    ...             'time': 1234567890,
--    ...         },
--    ...     },
--    ... }
--    ...
--    >>> check_file(doc)
--
--
--    And an invalid value:
--
--    >>> doc = {
--    ...     '_id': 'ZR765XWSF6S7JQHLUI4GCG5BHGPE252O',
--    ...     'ver': 0,
--    ...     'type': 'dmedia/file',
--    ...     'time': 1234567890,
--    ...     'bytes': 20202333,
--    ...     'ext': 'mov',
--    ...     'origin': 'user',
--    ...     'stored': {
--    ...         'MZZG2ZDSOQVSW2TEMVZG643F': {
--    ...             'number': 2,  # Changed!
--    ...             'time': 1234567890,
--    ...         },
--    ...     },
--    ... }
--    ...
--    >>> check_file(doc)
--    Traceback (most recent call last):
--      ...
--    ValueError: doc['stored']['MZZG2ZDSOQVSW2TEMVZG643F']['copies'] does not exist
++    ...             'mtime': 1234567890,
++    ...         },
++    ...     },
++    ... }
++    ...
++    >>> check_file(doc)
      """
--    check_dmedia(doc)
--
--    _check(doc, ['type'], basestring,
++    # Common schema:
++    _check(doc, [], dict)
++    _check(doc, ['_id'], None,
++        _intrinsic_id,
++    )
++    _check(doc, ['ver'], int,
++        (_equals, 0),
++    )
++    _check(doc, ['type'], str,
          (_equals, 'dmedia/file'),
+     )
--
--    try:
--        _check(doc, ['bytes'], int,
--            (_at_least, 1),
--        )
--    except TypeError:
--        _check(doc, ['bytes'], long,
--            (_at_least, 1),
--        )
--
--    _check(doc, ['ext'], (type(None), basestring),
--        (_matches, EXT_PAT),
--    )
--
--    _check(doc, ['origin'], basestring,
++    _check(doc, ['time'], (int, float),
++        (_at_least, 0),
++    )
++
++    # dmedia/file specific:
++    _check(doc, ['_attachments', 'leaf_hashes'], dict,
++        _nonempty,
++    )
++    _check(doc, ['_attachments', 'leaf_hashes', 'content_type'], str,
++        (_equals, 'application/octet-stream'),
++    )
++    _check(doc, ['bytes'], int,
++        (_at_least, 1),
++    )
++    _check(doc, ['origin'], str,
          _lowercase,
--        (_is_in, 'user', 'download', 'paid', 'proxy', 'cache', 'render'),
++        (_is_in, 'user', 'paid', 'download', 'proxy', 'render', 'cache'),
+     )
--
      _check(doc, ['stored'], dict,
          _nonempty,
+     )
@@ -894,13 +774,13 @@
          _check(doc, ['stored', store, 'copies'], int,
              (_at_least, 0),
+         )
--        _check(doc, ['stored', store, 'time'], (int, float),
++        _check(doc, ['stored', store, 'mtime'], (int, float),
              (_at_least, 0),
+         )
          _check_if_exists(doc, ['stored', store, 'verified'], (int, float),
              (_at_least, 0),
+         )
--        _check_if_exists(doc, ['stored', store, 'status'], basestring,
++        _check_if_exists(doc, ['stored', store, 'status'], str,
              (_is_in, 'partial', 'corrupt'),
+         )
          _check_if_exists(doc, ['stored', store, 'corrupted'], (int, float),
@@ -911,22 +791,26 @@
  def check_file_optional(doc):
++    # 'ext' like 'mov'
++    _check_if_exists(doc, ['ext'], str,
++        (_matches, EXT_PAT),
++    )
      # 'content_type' like 'video/quicktime'
--    _check_if_exists(doc, ['content_type'], basestring)
++    _check_if_exists(doc, ['content_type'], str)
      # 'content_encoding' like 'gzip'
--    _check_if_exists(doc, ['content_encoding'], basestring,
++    _check_if_exists(doc, ['content_encoding'], str,
          (_is_in, 'gzip', 'deflate'),
+     )
      # 'media' like 'video'
--    _check_if_exists(doc, ['media'], basestring,
++    _check_if_exists(doc, ['media'], str,
          (_is_in, 'video', 'audio', 'image'),
+     )
--    # 'mtime' like 1234567890
--    _check_if_exists(doc, ['mtime'], (int, float),
++    # 'ctime' like 1234567890
++    _check_if_exists(doc, ['ctime'], (int, float),
          (_at_least, 0),
+     )
@@ -936,11 +820,11 @@
+     )
      # name like 'MVI_5899.MOV'
--    _check_if_exists(doc, ['name'], basestring)
++    _check_if_exists(doc, ['name'], str)
      # dir like 'DCIM/100EOS5D2'
      # FIXME: Should save this as a list so path is portable
--    _check_if_exists(doc, ['dir'], basestring)
++    _check_if_exists(doc, ['dir'], str)
      # 'meta' like {'iso': 800}
      _check_if_exists(doc, ['meta'], dict)
@@ -956,52 +840,39 @@
      """
      Verify that *doc* is a valid "dmedia/store" document.
--    To be a valid 'dmedia/store' record, *doc* must:
--
--        1. conform with `check_dmedia()`
--
--        2. have 'plugin' that equal to 'filestore', 'removable_filestore',
--           'ubuntuone', or 's3'
--
--        3. have 'copies' that is an ``int`` >= 1
--
      For example, a conforming value:
      >>> doc = {
      ...     '_id': 'NZXXMYLDOV2F6ZTUO5PWM5DX',
      ...     'ver': 0,
--    ...     'type': 'dmedia/file',
--    ...     'time': 1234567890,
--    ...     'plugin': 'filestore',
--    ...     'copies': 2,
--    ... }
--    ...
--    >>> check_store(doc)
--
--
--    And an invalid value:
--
--    >>> doc = {
--    ...     '_id': 'NZXXMYLDOV2F6ZTUO5PWM5DX',
--    ...     'ver': 0,
--    ...     'type': 'dmedia/file',
--    ...     'time': 1234567890,
--    ...     'dispatch': 'filestore',
--    ...     'copies': 2,
--    ... }
--    ...
--    >>> check_store(doc)
--    Traceback (most recent call last):
--      ...
--    ValueError: doc['plugin'] does not exist
++    ...     'type': 'dmedia/store',
++    ...     'time': 1234567890,
++    ...     'plugin': 'filestore.local',
++    ...     'copies': 1,
++    ... }
++    ...
++    >>> check_store(doc)
      """
--    check_dmedia(doc)
--
--    _check(doc, ['plugin'], basestring,
--        (_is_in, 'filestore', 'removable_filestore', 'ubuntuone', 's3'),
--    )
--
++    # Common schema:
++    _check(doc, [], dict)
++    _check(doc, ['_id'], None,
++        _random_id,
++    )
++    _check(doc, ['ver'], int,
++        (_equals, 0),
++    )
++    _check(doc, ['type'], str,
++        (_equals, 'dmedia/store'),
++    )
++    _check(doc, ['time'], (int, float),
++        (_at_least, 0),
++    )
++
++    # Specific to dmedia/store
++    _check(doc, ['plugin'], str,
++        (_is_in, 'filestore.local', 'filestore.removable', 'ubuntuone', 's3'),
++    )
      _check(doc, ['copies'], int,
          (_at_least, 0),
+     )
@@ -1020,7 +891,7 @@
 . have 'uuid', 'fs', 'drive_id' as ``str`` strings.
--        4. have 'label', 'partition_label' as ``unicode`` strings.
++        4. have 'label', 'partition_label' as ``str`` strings.
 . have 'size' as an ``int`` or ``long``.
@@ -1036,8 +907,8 @@
          ...     'size': 1073741824,
          ...     'uuid': '45e8f250-b56a-11e0-aff2-0800200c9a66',
          ...     'fs': 'ext4',
--        ...     'label': u'Data',
--        ...     'partition_label': u'',
++        ...     'label': 'Data',
++        ...     'partition_label': '',
          ...     'drive_id': 'XBBXAIVUK4LPXJMAKCT4TEM2RDGK7HNG'
          ... }
          ...
@@ -1053,8 +924,8 @@
          ...     'time': 1234567890,
          ...     'size': 1073741824,
          ...     'uuid': '45e8f250-b56a-11e0-aff2-0800200c9a66',
--        ...     'label': u'Data',
--        ...     'partition_label': u'',
++        ...     'label': 'Data',
++        ...     'partition_label': '',
          ...     'drive_id': 'XBBXAIVUK4LPXJMAKCT4TEM2RDGK7HNG'
          ... }
          ...
@@ -1069,14 +940,14 @@
      _check_types(
          doc,
          (['uuid'], str),
--        (['size'], int, long),
--        (['label'], unicode),
--        (['partition_label'], unicode),
++        (['size'], int),
++        (['label'], str),
++        (['partition_label'], str),
          (['fs'], str),
          (['drive_id'], str)
+     )
--    _base32(doc['drive_id'], _label('drive_id'))
++    _any_id(doc['drive_id'], _label('drive_id'))
  def check_drive(doc):
@@ -1092,7 +963,7 @@
 . have 'serial', 'wwn', 'revision' as ``str`` strings.
--        4. have 'vendor', 'model' as ``unicode`` strings.
++        4. have 'vendor', 'model' as ``str`` strings.
          For example, a conforming value:
@@ -1104,8 +975,8 @@
          ...     'serial': 'A0000001B900',
          ...     'wwn': '50014ee0016eb572',
          ...     'revision': '1.95',
--        ...     'vendor': u'Canon',
--        ...     'model': u'EOS 7D'
++        ...     'vendor': 'Canon',
++        ...     'model': 'EOS 7D'
          ... }
          ...
          >>> check_drive(doc)
@@ -1121,7 +992,7 @@
          ...     'serial': 'A0000001B900',
          ...     'wwn': '50014ee0016eb572',
          ...     'revision': '1.95',
--        ...     'vendor': u'Canon'
++        ...     'vendor': 'Canon'
          ... }
          ...
          >>> check_drive(doc)
@@ -1136,8 +1007,8 @@
          doc,
          (['serial'], str),
          (['wwn'], str),
--        (['vendor'], unicode),
--        (['model'], unicode),
++        (['vendor'], str),
++        (['model'], str),
          (['revision'], str)
+     )
@@ -1145,36 +1016,22 @@
  #######################################################
  # Functions for creating specific types of dmedia docs:
--def create_file(_id, file_size, leaf_hashes, stored, ext=None, origin='user'):
++def create_file(_id, file_size, leaf_hashes, stored, origin='user'):
      """
      Create a minimal 'dmedia/file' document.
--
--    :param _id: the content hash, eg ``'JK47OD6N5JYFGEIFB53LX7XPUSYCWDUM'``
--    :param file_size: an ``int``, the file size in bytes, eg ``20202333``
--    :param leaf_hashes: a ``bytes`` instance containing the concatenated content
--        hashes of the leaves
--    :param stored: a ``dict`` containing locations this file is stored
--        ``'Y4J3WQCMKV5GHATOCZZBHF4Y'``
--    :param ext: the file extension, eg ``'mov'``; default is ``None``
--    :param origin: the file's origin (for durability/reclamation purposes);
--        default is ``'user'``
      """
--    ts = time.time()
--    for value in stored.values():
--        value['time'] = ts
      return {
          '_id': _id,
          '_attachments': {
--            'leaves': {
--                'data': b64encode(leaf_hashes),
++            'leaf_hashes': {
++                'data': b64encode(leaf_hashes).decode('utf-8'),
                  'content_type': 'application/octet-stream',
+             }
          },
          'ver': 0,
          'type': 'dmedia/file',
--        'time': ts,
++        'time': time.time(),
          'bytes': file_size,
--        'ext': ext,
          'origin': origin,
          'stored': stored,
+     }
@@ -1190,34 +1047,39 @@
          'type': 'dmedia/machine',
          'time': time.time(),
          'hostname': socket.gethostname(),
--        'distribution': list(platform.linux_distribution()),
+     }
  def create_store(parentdir, machine_id, copies=1):
      """
--    Create a 'dmedia/store' document.
++    Create a 'dmedia/store' doc for a FileStore on a non-removable drive.
      """
--    # FIXME: We're going to have have the drive and partition information passed
--    # to schema.py "from the outside" as to abstract whether the info comes from
--    # udisks or the equivalent on other platforms.
--    #try:
--    #    makedirs(parentdir)
--    #except:
--    #    pass
--    #p = Device(path=parentdir)
--    #uuid = str(p['IdUuid'])
      return {
          '_id': random_id(),
          'ver': 0,
          'type': 'dmedia/store',
          'time': time.time(),
--        'plugin': 'filestore',
--        'copies': copies,
--        'path': parentdir,
++        'plugin': 'filestore.local',
++        'parentdir': parentdir,
          'machine_id': machine_id,
--        #'partition_id': b32encode(sha1(uuid).digest())
--    }
++        'copies': copies,
++    }
++
++
++def create_removable_store(copies=1, **kw):
++    """
++    Create a 'dmedia/store' document.
++    """
++    doc =  {
++        '_id': random_id(),
++        'ver': 0,
++        'type': 'dmedia/store',
++        'time': time.time(),
++        'plugin': 'filestore.removable',
++        'copies': copies,
++    }
++    doc.update(kw)
++    return doc
  def create_s3_store(bucket, copies=2, use_ext=True):
@@ -1249,41 +1111,36 @@
          'imports': [],
          'errors': [],
          'stats': {
--            'considered': {'count': 0, 'bytes': 0},
--            'imported': {'count': 0, 'bytes': 0},
--            'skipped': {'count': 0, 'bytes': 0},
++            'total': {'count': 0, 'bytes': 0},
++            'new': {'count': 0, 'bytes': 0},
++            'duplicate': {'count': 0, 'bytes': 0},
              'empty': {'count': 0, 'bytes': 0},
--            'error': {'count': 0, 'bytes': 0},
--        }
++        },
+     }
--def create_import(base, partition_id, batch_id=None, machine_id=None):
++def create_import(basedir, machine_id, **kw):
      """
      Create initial 'dmedia/import' accounting document.
      """
--    return {
++    doc = {
          '_id': random_id(),
          'ver': 0,
          'type': 'dmedia/import',
          'time': time.time(),
--        'batch_id': batch_id,
++        'basedir': basedir,
          'machine_id': machine_id,
--        'partition_id': partition_id,
--        'base': base,
--        'log': {
--            'imported': [],
--            'skipped': [],
--            'empty': [],
--            'error': [],
--        },
++        'files': {},
++        'import_order': [],
          'stats': {
--            'imported': {'count': 0, 'bytes': 0},
--            'skipped': {'count': 0, 'bytes': 0},
++            'total': {'count': 0, 'bytes': 0},
++            'new': {'count': 0, 'bytes': 0},
++            'duplicate': {'count': 0, 'bytes': 0},
              'empty': {'count': 0, 'bytes': 0},
--            'error': {'count': 0, 'bytes': 0},
--        }
++        },
+     }
++    doc.update(kw)
++    return doc
  def create_partition(base):
@@ -1300,8 +1157,8 @@
          'time': time.time(),
          'uuid': uuid,
          'size': int(p['DeviceSize']),
--        'label': unicode(p['IdLabel']),
--        'partition_label': unicode(p['PartitionLabel']),
++        'label': str(p['IdLabel']),
++        'partition_label': str(p['PartitionLabel']),
          'fs': str(p['IdType']),
          'drive_id': _drive_id(d)
+     }
@@ -1320,7 +1177,7 @@
          'time': time.time(),
          'serial': str(d['DriveSerial']),
          'wwn': str(d['DriveWwn']),
--        'vendor': unicode(d['DriveVendor']),
--        'model': unicode(d['DriveModel']),
++        'vendor': str(d['DriveVendor']),
++        'model': str(d['DriveModel']),
          'revision': str(d['DriveRevision'])
+     }
 === added file 'dmedia/tests/base.py'
 --- dmedia/tests/base.py	1970-01-01 00:00:00 +0000
 +++ dmedia/tests/base.py	2011-09-22 11:43:29 +0000
@@ -0,0 +1,164 @@
++# Authors:
++#   Jason Gerard DeRose <jderose@novacut.com>
++#
++# dmedia: distributed media library
++# Copyright (C) 2011 Jason Gerard DeRose <jderose@novacut.com>
++#
++# This file is part of `dmedia`.
++#
++# `dmedia` is free software: you can redistribute it and/or modify it under the
++# terms of the GNU Affero General Public License as published by the Free
++# Software Foundation, either version 3 of the License, or (at your option) any
++# later version.
++#
++# `dmedia` is distributed in the hope that it will be useful, but WITHOUT ANY
++# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
++# A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
++# details.
++#
++# You should have received a copy of the GNU Affero General Public License along
++# with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
++
++"""
++Usefull TestCase subclasses.
++"""
++
++from unittest import TestCase
++from os import path
++from base64 import b64decode
++import os
++from os import path
++import tempfile
++import shutil
++from random import SystemRandom
++
++from filestore import File, Leaf, ContentHash, Batch, Hasher, LEAF_SIZE
++from microfiber import random_id
++
++
++datadir = path.join(path.dirname(path.abspath(__file__)), 'data')
++random = SystemRandom()
++
++
++class SampleFilesTestCase(TestCase):
++    """
++    Base clase for tests that use the files in dmedia/tests/data.
++
++    If the MVI_5751.MOV or MVI_5751.THM file isn't present, self.skipTest() is
++    called.  This will allow us to stop shipping the 20MB video file in the
++    dmedia release tarballs.
++    """
++
++    mov = path.join(datadir, 'MVI_5751.MOV')
++    thm = path.join(datadir, 'MVI_5751.THM')
++    mov_ch = ContentHash(
++        'SM3GS4DUDVXOEU2DTTTWU5HKNRK777IWNSI5UQ4ZWNQGRXAN',
++        20202333,
++        b64decode(b''.join([
++            b'Ps9ZlZ5RALOGrqUbXJYJDFJaLClkGKkv4gYu2cWn',
++            b'dse+QPUQFn9Q6FBkhhX0hjDGHOyMnFtGdAgRY1Gc',
++            b'XzjjVS002vjsMkVKb4/+E7qmeGfHsBFFbYV127ux'
++        ]))
++    )
++    thm_ch = ContentHash(
++        'MXPCFNUNPDAWHQWC5QNTPP2U5OF2J267QQVALXX6B5TRJKJB',
++        27328,
++        b64decode(b'RwtCvXTjDrah3O23qNkobCGNF6hq7HYIB4TRx2Dh'),
++    )
++
++    def setUp(self):
++        for filename in (self.mov, self.thm):
++            if not path.isfile(filename):
++                self.skipTest('Missing file {!r}'.format(filename))
++
++
++def random_leaves(file_size):
++    index = 0
++    for full in range(file_size // LEAF_SIZE):
++        data = os.urandom(16) * (LEAF_SIZE // 16)
++        yield Leaf(index, data)
++        index += 1
++    partial = file_size % LEAF_SIZE
++    if partial:
++        data = os.urandom(1) * partial
++        yield Leaf(index, data)
++
++
++def random_file(tmpdir, max_size):
++    filename = path.join(tmpdir, random_id())
++    file_size = random.randint(1, max_size)
++    dst_fp = open(filename, 'wb')
++    h = Hasher()
++    for leaf in random_leaves(file_size):
++        h.hash_leaf(leaf)
++        dst_fp.write(leaf.data)
++    dst_fp.close()
++    st = os.stat(filename)
++    file = File(filename, st.st_size, st.st_mtime)
++    assert file.size == file_size
++    return (file, h.content_hash())
++
++
++def random_empty(tmpdir):
++    filename = path.join(tmpdir, random_id())
++    open(filename, 'wb').close()
++    st = os.stat(filename)
++    file = File(filename, st.st_size, st.st_mtime)
++    assert file.size == 0
++    return (file, None)
++
++
++class TempDir(object):
++    def __init__(self):
++        self.dir = tempfile.mkdtemp(prefix='unittest.')
++
++    def __del__(self):
++        self.rmtree()
++
++    def rmtree(self):
++        if self.dir is not None:
++            shutil.rmtree(self.dir)
++            self.dir = None
++
++    def join(self, *parts):
++        return path.join(self.dir, *parts)
++
++    def makedirs(self, *parts):
++        d = self.join(*parts)
++        if not path.exists(d):
++            os.makedirs(d)
++        assert path.isdir(d), d
++        return d
++
++    def touch(self, *parts):
++        self.makedirs(*parts[:-1])
++        f = self.join(*parts)
++        open(f, 'wb').close()
++        return f
++
++    def write(self, data, *parts):
++        self.makedirs(*parts[:-1])
++        f = self.join(*parts)
++        open(f, 'wb').write(data)
++        return f
++
++    def copy(self, src, *parts):
++        self.makedirs(*parts[:-1])
++        dst = self.join(*parts)
++        shutil.copy(src, dst)
++        return dst
++
++    def random_batch(self, count, empties=0, max_size=LEAF_SIZE*4):
++        result = list(self.random_file(max_size) for i in range(count))
++        result.extend(self.random_empty() for i in range(empties))
++        result.sort(key=lambda tup: tup[0].name)
++        files = tuple(file for (file, ch) in result)
++        batch = Batch(files, sum(f.size for f in files), len(files))
++        return (batch, result)
++
++    def random_file(self, max_size=LEAF_SIZE*4):
++        return random_file(self.dir, max_size)
++
++    def random_empty(self):
++        return random_empty(self.dir)
++
 === modified file 'dmedia/tests/couch.py'
 --- dmedia/tests/couch.py	2011-09-15 11:41:48 +0000
 +++ dmedia/tests/couch.py	2011-09-22 11:43:29 +0000
@@ -30,7 +30,7 @@
  from subprocess import Popen
  import time
  import socket
--from hashlib import sha1
++from hashlib import sha1, md5
  from base64 import b32encode
  import shutil
  from copy import deepcopy
@@ -38,8 +38,6 @@
  import microfiber
  from microfiber import random_id
--from .helpers import TempHome
--
  SOCKET_OPTIONS = '[{recbuf, 262144}, {sndbuf, 262144}, {nodelay, true}]'
@@ -93,7 +91,7 @@
  def random_key():
--    return b32encode(os.urandom(10))
++    return b32encode(os.urandom(10)).decode('utf-8')
  def random_oauth():
@@ -122,11 +120,12 @@
  def random_salt():
--    return os.urandom(16).encode('hex')
++    return md5(os.urandom(16)).hexdigest()
  def couch_hashed(password, salt):
--    hexdigest = sha1(password + salt).hexdigest()
++    data = (password + salt).encode('utf-8')
++    hexdigest = sha1(data).hexdigest()
      return '-hashed-{},{}'.format(hexdigest, salt)
@@ -240,13 +239,11 @@
      def setUp(self):
          self.tmpcouch = TempCouch()
          self.env = self.tmpcouch.bootstrap()
--        self.home = TempHome()
          self.machine_id = random_id()
          self.env['machine_id'] = self.machine_id
--        self.env['filestore'] = {'_id': random_id(), 'path': self.home.path}
      def tearDown(self):
          self.tmpcouch.kill()
          self.tmpcouch = None
--        self.home = None
          self.env = None
++        self.machine_id = None
 === modified file 'dmedia/tests/helpers.py'
 --- dmedia/tests/helpers.py	2011-04-06 20:56:54 +0000
 +++ dmedia/tests/helpers.py	2011-09-22 11:43:29 +0000
@@ -36,19 +36,19 @@
  mov_hash = 'TGX33XXWU3EVHEEY5J7NBOJGKBFXLEBK'
  mov_size = 20202333
  mov_leaves = [
--    b32decode('IXJTSUCYYFECGSG6JIB2R77CAJVJK4W3'),
--    b32decode('MA3IAHUOKXR4TRG7CWAPOO7U4WCV5WJ4'),
--    b32decode('FHF7KDMAGNYOVNYSYT6ZYWQLUOCTUADI'),
++    b32decode(b'IXJTSUCYYFECGSG6JIB2R77CAJVJK4W3'),
++    b32decode(b'MA3IAHUOKXR4TRG7CWAPOO7U4WCV5WJ4'),
++    b32decode(b'FHF7KDMAGNYOVNYSYT6ZYWQLUOCTUADI'),
+ ]
  mov_att = {
--    'data': b64encode(''.join(mov_leaves)),
++    'data': b64encode(b''.join(mov_leaves)),
      'content_type': 'application/octet-stream',
+ }
  mov_qid = 'GJ4AQP3BK3DMTXYOLKDK6CW4QIJJGVMN'
  thm_hash = 'GKZMOPVZILR43MZCXLVYP7T62XGBT7BQ'
--thm_leaves = [b32decode('F6ATTKI6YVWVRBQQESAZ4DSUXQ4G457A')]
++thm_leaves = [b32decode(b'F6ATTKI6YVWVRBQQESAZ4DSUXQ4G457A')]
  thm_qid = 'EYCDXXCNDB6OIIX5DN74J7KEXLNCQD5M'
@@ -64,28 +64,25 @@
      return (src1, src2, dup1)
--class ExceptionNotRaised(StandardError):
++class ExceptionNotRaised(Exception):
      """
      Raised when an expected exception is not raised.
      """
      def __init__(self, expected):
          self.expected = expected
--        StandardError.__init__(self, 'expected %s' % expected.__name__)
++        Exception.__init__(self, 'expected %s' % expected.__name__)
  def raises(exception, callback, *args, **kw):
      """
      Test that ``exception`` is raised when ``callback`` is called.
      """
--    raised = False
      try:
          callback(*args, **kw)
--    except exception, e:
--        raised = True
--    if not raised:
--        raise ExceptionNotRaised(exception)
--    return e
++    except exception as e:
++        return e
++    raise ExceptionNotRaised(exception)
  class TempDir(object):
 === added file 'dmedia/tests/test_client.py'
 --- dmedia/tests/test_client.py	1970-01-01 00:00:00 +0000
 +++ dmedia/tests/test_client.py	2011-09-22 11:43:29 +0000
@@ -0,0 +1,259 @@
++# dmedia: dmedia hashing protocol and file layout
++# Copyright (C) 2011 Novacut Inc
++#
++# This file is part of `dmedia`.
++#
++# `dmedia` is free software: you can redistribute it and/or modify it under
++# the terms of the GNU Affero General Public License as published by the Free
++# Software Foundation, either version 3 of the License, or (at your option) any
++# later version.
++#
++# `dmedia` is distributed in the hope that it will be useful, but WITHOUT ANY
++# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
++# A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
++# details.
++#
++# You should have received a copy of the GNU Affero General Public License along
++# with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
++#
++# Authors:
++#   Jason Gerard DeRose <jderose@novacut.com>
++
++"""
++Unit tests for `dmedia.client`.
++"""
++
++from unittest import TestCase
++import os
++from http.client import HTTPConnection, HTTPSConnection
++
++from microfiber import random_id
++from filestore import ContentHash, TYPE_ERROR, DIGEST_BYTES
++
++from dmedia import client
++
++
++class FakeResponse:
++    def __init__(self, status, reason):
++        self.status = status
++        self.reason = reason
++        self._data = os.urandom(16)
++
++    def read(self):
++        return self._data
++
++
++class TestErrors(TestCase):
++    def test_errors(self):
++        self.assertEqual(
++            client.errors,
++            {
++                400: client.BadRequest,
++                401: client.Unauthorized,
++                403: client.Forbidden,
++                404: client.NotFound,
++                405: client.MethodNotAllowed,
++                406: client.NotAcceptable,
++                409: client.Conflict,
++                412: client.PreconditionFailed,
++                415: client.BadContentType,
++                416: client.BadRangeRequest,
++                417: client.ExpectationFailed,
++            }
++        )
++        method = 'MOST'
++        path = '/restful?and=awesome'
++        for (status, klass) in client.errors.items():
++            reason = random_id()
++            r = FakeResponse(status, reason)
++            inst = klass(r, method, path)
++            self.assertIs(inst.response, r)
++            self.assertEqual(inst.method, method)
++            self.assertEqual(inst.path, path)
++            self.assertEqual(inst.data, r._data)
++            self.assertEqual(
++                str(inst),
++                '{} {}: {} {}'.format(status, reason, method, path)
++            )
++
++
++class TestFunctions(TestCase):
++    def test_http_conn(self):
++        f = client.http_conn
++
++        # Test with bad scheme
++        with self.assertRaises(ValueError) as cm:
++            (conn, t) = f('ftp://foo.s3.amazonaws.com/')
++        self.assertEqual(
++            str(cm.exception),
++            "url scheme must be http or https: 'ftp://foo.s3.amazonaws.com/'"
++        )
++
++        # Test with bad url
++        with self.assertRaises(ValueError) as cm:
++            (inst, t) = f('http:foo.s3.amazonaws.com/')
++        self.assertEqual(
++            str(cm.exception),
++            "bad url: 'http:foo.s3.amazonaws.com/'"
++        )
++
++        # Test with HTTP
++        (conn, t) = f('http://foo.s3.amazonaws.com/')
++        self.assertIsInstance(conn, HTTPConnection)
++        self.assertNotIsInstance(conn, HTTPSConnection)
++        self.assertEqual(t, ('http', 'foo.s3.amazonaws.com', '/', '', '', ''))
++
++        # Test with HTTPS
++        (conn, t) = f('https://foo.s3.amazonaws.com/')
++        self.assertIsInstance(conn, HTTPSConnection)
++        self.assertEqual(t, ('https', 'foo.s3.amazonaws.com', '/', '', '', ''))
++
++    def test_bytes_range(self):
++        f = client.bytes_range
++        self.assertEqual(f(0, 500), 'bytes=0-499')
++        self.assertEqual(f(500, 1000), 'bytes=500-999')
++        self.assertEqual(f(-500), 'bytes=-500')
++        self.assertEqual(f(9500), 'bytes=9500-')
++
++    def test_check_slice(self):
++        ch = ContentHash('foo', None, (1, 2, 3))
++
++        # Test all valid slices
++        client.check_slice(ch, 0, None)
++        client.check_slice(ch, 1, None)
++        client.check_slice(ch, 2, None)
++        client.check_slice(ch, 0, 1)
++        client.check_slice(ch, 0, 2)
++        client.check_slice(ch, 1, 2)
++        client.check_slice(ch, 0, 3)
++        client.check_slice(ch, 1, 3)
++        client.check_slice(ch, 2, 3)
++
++        # ch type
++        with self.assertRaises(TypeError) as cm:
++            bad = ('foo', None, (1, 2, 3))
++            client.check_slice(bad, 1, None)
++        self.assertEqual(
++            str(cm.exception),
++            TYPE_ERROR.format('ch', ContentHash, tuple, bad)
++        )
++
++        # ch.leaf_hashes type
++        with self.assertRaises(TypeError) as cm:
++            bad = ContentHash('foo', None, os.urandom(DIGEST_BYTES))
++            client.check_slice(bad, 1, None)
++        self.assertEqual(
++            str(cm.exception),
++            'ch.leaf_hashes not unpacked for ch.id=foo'
++        )
++
++        # empty ch.leaf_hashes
++        with self.assertRaises(ValueError) as cm:
++            bad = ContentHash('foo', None, tuple())
++            client.check_slice(bad, 1, None)
++        self.assertEqual(
++            str(cm.exception),
++            'got empty ch.leaf_hashes for ch.id=foo'
++        )
++
++        # start type
++        with self.assertRaises(TypeError) as cm:
++            client.check_slice(ch, 0.0, None)
++        self.assertEqual(
++            str(cm.exception),
++            TYPE_ERROR.format('start', int, float, 0.0)
++        )
++
++        # stop type
++        with self.assertRaises(TypeError) as cm:
++            client.check_slice(ch, 0, 1.0)
++        self.assertEqual(
++            str(cm.exception),
++            TYPE_ERROR.format('stop', int, float, 1.0)
++        )
++
++        # start value
++        with self.assertRaises(ValueError) as cm:
++            client.check_slice(ch, -1, None)
++        self.assertEqual(
++            str(cm.exception),
++            'Need 0 <= start < 3; got start=-1'
++        )
++        with self.assertRaises(ValueError) as cm:
++            client.check_slice(ch, 3, None)
++        self.assertEqual(
++            str(cm.exception),
++            'Need 0 <= start < 3; got start=3'
++        )
++
++        # stop value
++        with self.assertRaises(ValueError) as cm:
++            client.check_slice(ch, 0, 0)
++        self.assertEqual(
++            str(cm.exception),
++            'Need 1 <= stop <= 3; got stop=0'
++        )
++        with self.assertRaises(ValueError) as cm:
++            client.check_slice(ch, 0, 4)
++        self.assertEqual(
++            str(cm.exception),
++            'Need 1 <= stop <= 3; got stop=4'
++        )
++
++        # start < stop
++        with self.assertRaises(ValueError) as cm:
++            client.check_slice(ch, 2, 1)
++        self.assertEqual(
++            str(cm.exception),
++            'Need start < stop; got start=2, stop=1'
++        )
++        with self.assertRaises(ValueError) as cm:
++            client.check_slice(ch, 1, 1)
++        self.assertEqual(
++            str(cm.exception),
++            'Need start < stop; got start=1, stop=1'
++        )
++
++
++class TestHTTPClient(TestCase):
++    def test_init(self):
++        bad = 'sftp://localhost:5984/'
++        with self.assertRaises(ValueError) as cm:
++            inst = client.HTTPClient(bad)
++        self.assertEqual(
++            str(cm.exception),
++            'url scheme must be http or https: {!r}'.format(bad)
++        )
++        bad = 'http:localhost:5984/foo/bar'
++        with self.assertRaises(ValueError) as cm:
++            inst = client.HTTPClient(bad)
++        self.assertEqual(
++            str(cm.exception),
++            'bad url: {!r}'.format(bad)
++        )
++
++        inst = client.HTTPClient('https://localhost:5984/couch?foo=bar/')
++        self.assertEqual(inst.url, 'https://localhost:5984/couch/')
++        self.assertEqual(inst.basepath, '/couch/')
++        self.assertIsInstance(inst.conn, HTTPSConnection)
++
++        inst = client.HTTPClient('http://localhost:5984?/')
++        self.assertEqual(inst.url, 'http://localhost:5984/')
++        self.assertEqual(inst.basepath, '/')
++        self.assertIsInstance(inst.conn, HTTPConnection)
++
++        inst = client.HTTPClient('http://localhost:5001/')
++        self.assertEqual(inst.url, 'http://localhost:5001/')
++        self.assertIsInstance(inst.conn, HTTPConnection)
++
++        inst = client.HTTPClient('http://localhost:5002')
++        self.assertEqual(inst.url, 'http://localhost:5002/')
++        self.assertIsInstance(inst.conn, HTTPConnection)
++
++        inst = client.HTTPClient('https://localhost:5003/')
++        self.assertEqual(inst.url, 'https://localhost:5003/')
++        self.assertIsInstance(inst.conn, HTTPSConnection)
++
++        inst = client.HTTPClient('https://localhost:5004')
++        self.assertEqual(inst.url, 'https://localhost:5004/')
++        self.assertIsInstance(inst.conn, HTTPSConnection)
 === modified file 'dmedia/tests/test_core.py'
 --- dmedia/tests/test_core.py	2011-09-16 05:03:51 +0000
 +++ dmedia/tests/test_core.py	2011-09-22 11:43:29 +0000
@@ -30,18 +30,27 @@
  from os import path
  import microfiber
++from filestore import FileStore, DIGEST_BYTES
  from dmedia.schema import random_id, check_store
--from dmedia.filestore import FileStore
  from dmedia import core
--from .helpers import TempDir, mov_hash, sample_mov
++from .helpers import TempHome
  from .couch import CouchCase
++from .base import TempDir
  class TestCore(CouchCase):
      klass = core.Core
++    def setUp(self):
++        super().setUp()
++        self.home = TempHome()
++
++    def tearDown(self):
++        super().tearDown()
++        self.home = None
++
      def test_init(self):
          inst = self.klass(self.env)
          self.assertIs(inst.env, self.env)
@@ -84,7 +93,6 @@
                  'type',
                  'time',
                  'hostname',
--                'distribution',
              ])
+         )
          self.assertEqual(machine, inst.db.get(local['machine']['_id']))
@@ -142,16 +150,16 @@
                  'time',
                  'plugin',
                  'copies',
--                'path',
++                'parentdir',
                  'machine_id',
                  #'partition_id',
              ])
+         )
          self.assertEqual(lstore['ver'], 0)
          self.assertEqual(lstore['type'], 'dmedia/store')
--        self.assertEqual(lstore['plugin'], 'filestore')
++        self.assertEqual(lstore['plugin'], 'filestore.local')
          self.assertEqual(lstore['copies'], 1)
--        self.assertEqual(lstore['path'], self.home.path)
++        self.assertEqual(lstore['parentdir'], self.home.path)
          self.assertEqual(lstore['machine_id'], inst.machine_id)
          store = inst.db.get(_id)
@@ -198,12 +206,12 @@
          self.assertEqual(set(inst._filestores), set([okay]))
          fs = inst._filestores[okay]
          self.assertIsInstance(fs, FileStore)
--        self.assertEqual(fs.parent, okay)
++        self.assertEqual(fs.parentdir, okay)
          # Test the doc
          check_store(store)
          self.assertEqual(inst.db.get(store['_id']), store)
--        self.assertEqual(store['path'], okay)
++        self.assertEqual(store['parentdir'], okay)
          self.assertTrue(store.pop('_rev').startswith('1-'))
          self.assertEqual(list(inst.local['filestores']), [okay])
          self.assertEqual(inst.local['filestores'][okay], store)
@@ -217,26 +225,25 @@
          self.assertEqual(inst.db.get('_local/dmedia')['_rev'], '0-1')
      def test_get_file(self):
++        src = TempDir()
++        (file, ch) = src.random_file()
++
          inst = self.klass(self.env)
--        doc = {
--            '_id': mov_hash,
--            'ext': 'mov',
--        }
--        inst.db.save(doc)
--        self.assertIsNone(inst.get_file(mov_hash))
++        self.assertIsNone(inst.get_file(ch.id))
          tmp1 = TempDir()
          tmp2 = TempDir()
--        fs1 = FileStore(tmp1.path)
--        fs2 = FileStore(tmp2.path)
--        inst._filestores[tmp1.path] = fs1
--        inst._filestores[tmp2.path] = fs2
--        self.assertIsNone(inst.get_file(mov_hash))
++        fs1 = FileStore(tmp1.dir)
++        fs2 = FileStore(tmp2.dir)
++        inst._filestores[tmp1.dir] = fs1
++        inst._filestores[tmp2.dir] = fs2
++        self.assertIsNone(inst.get_file(ch.id))
--        src_fp = open(sample_mov, 'rb')
++        src_fp = open(file.name, 'rb')
          fs1.import_file(src_fp)
--        self.assertIsNone(inst.get_file(mov_hash))
++        self.assertEqual(inst.get_file(ch.id), fs1.path(ch.id))
++        fs1.remove(ch.id)
--        src_fp = open(sample_mov, 'rb')
--        fs2.import_file(src_fp, 'mov')
--        self.assertEqual(inst.get_file(mov_hash), fs2.path(mov_hash, 'mov'))
++        src_fp = open(file.name, 'rb')
++        fs2.import_file(src_fp)
++        self.assertEqual(inst.get_file(ch.id), fs2.path(ch.id))
 === modified file 'dmedia/tests/test_extractor.py'
 --- dmedia/tests/test_extractor.py	2011-08-29 02:00:26 +0000
 +++ dmedia/tests/test_extractor.py	2011-09-22 11:43:29 +0000
@@ -23,231 +23,231 @@
  Unit tests for `dmedia.extractor` module.
  """
--from unittest import TestCase
  import base64
  from os import path
--import Image
--from .helpers import sample_mov, sample_thm, TempDir
++
++from .base import TempDir, SampleFilesTestCase
++
  from dmedia import extractor
  # Known EXIF data as returned be exiftool:
  sample_thm_exif = {
--    u'AddOriginalDecisionData': u'Off',
--    u'AEBAutoCancel': u'On',
--    u'AEBBracketValue': 0,
--    u'AEBSequence': u'0,-,+',
--    u'AFAssistBeam': u'Emits',
--    u'AFMicroAdjActive': u'No',
--    u'AFMicroadjustment': u'Disable; 0; 0; 0; 0',
--    u'AFMicroAdjValue': 0,
--    u'AFOnAELockButtonSwitch': u'Disable',
--    u'AFPointAreaExpansion': u'Disable',
--    u'AFPointSelectionMethod': u'Normal',
--    u'Aperture': 11.0,
--    u'ApertureValue': 11.300000000000001,
--    u'Artist': u'',
--    u'AssignFuncButton': u'LCD brightness',
--    u'AutoExposureBracketing': u'Off',
--    u'AutoISO': 100,
--    u'AutoLightingOptimizer': u'Disable',
--    u'BaseISO': 100,
--    u'BlackMaskBottomBorder': 0,
--    u'BlackMaskLeftBorder': 0,
--    u'BlackMaskRightBorder': 0,
--    u'BlackMaskTopBorder': 0,
--    u'BracketMode': u'Off',
--    u'BracketShotNumber': 0,
--    u'BracketValue': 0,
--    u'BulbDuration': 0,
--    u'CameraType': u'EOS High-end',
--    u'CanonExposureMode': u'Manual',
--    u'CanonFirmwareVersion': u'Firmware Version 2.0.7',
--    u'CanonFlashMode': u'Off',
--    u'CanonImageSize': u'1920x1080 Movie',
--    u'CanonImageType': u'MVI:Canon EOS 5D Mark II',
--    u'CanonModelID': u'EOS 5D Mark II',
--    u'CircleOfConfusion': u'0.031 mm',
--    u'ColorComponents': 3,
--    u'ColorSpace': u'sRGB',
--    u'ColorTemperature': 3600,
--    u'ColorTone': u'Normal',
--    u'ComponentsConfiguration': u'Y, Cb, Cr, -',
--    u'ContinuousDrive': u'Movie',
--    u'Contrast': -4,
--    u'ControlMode': u'Camera Local Control',
--    u'Copyright': u'',
--    u'CreateDate': u'2010:10:19 20:43:14',
--    u'CustomRendered': u'Normal',
--    u'DateTimeOriginal': u'2010:10:19 20:43:14',
--    u'DialDirectionTvAv': u'Normal',
--    u'DigitalGain': 0,
--    u'DigitalZoom': u'None',
--    #u'Directory': u'dmedia/tests/data',
--    u'DriveMode': u'Continuous Shooting',
--    u'EasyMode': u'Manual',
--    u'EncodingProcess': u'Baseline DCT, Huffman coding',
--    #u'ExifByteOrder': u'Little-endian (Intel, II)',
--    u'ExifImageHeight': 120,
--    u'ExifImageWidth': 160,
--    #u'ExifToolVersion': 8.1500000000000004,
--    u'ExifVersion': u'0221',
--    u'ExposureCompensation': 0,
--    u'ExposureLevelIncrements': u'1/3 Stop',
--    u'ExposureMode': u'Auto',
--    u'ExposureProgram': u'Manual',
--    u'ExposureTime': u'1/100',
--    #u'FileModifyDate': u'2010:10:19 20:43:18-06:00',
--    #u'FileName': u'MVI_5751.THM',
--    #u'FilePermissions': u'rw-r--r--',
--    #u'FileSize': u'27 kB',
--    #u'FileType': u'JPEG',
--    u'FlashActivity': 0,
--    u'FlashBits': u'(none)',
--    u'FlashExposureComp': 0, u'SequenceNumber': 0,
--    u'FlashExposureLock': u'Off',
--    u'FlashGuideNumber': 0,
--    u'FlashpixVersion': u'0100',
--    u'FlashSyncSpeedAv': u'Auto',
--    u'Flash': u'Off, Did not fire',
--    u'FNumber': 11.0,
--    u'FocalLength35efl': u'138.0 mm (35 mm equivalent: 134.7 mm)',
--    u'FocalLength': u'138.0 mm',
--    u'FocalPlaneResolutionUnit': u'inches',
--    u'FocalPlaneXResolution': 109.6641535,
--    u'FocalPlaneYResolution': 125.26096029999999,
--    u'FocalUnits': u'1/mm',
--    u'FocusingScreen': u'Eg-D',
--    u'FocusMode': u'Manual Focus (3)',
--    u'FocusRange': u'Not Known',
--    u'FOV': u'15.2 deg',
--    u'GPSVersionID': u'2.2.0.0',
--    u'HighISONoiseReduction': u'Standard',
--    u'HighlightTonePriority': u'Disable',
--    u'HyperfocalDistance': u'56.23 m',
--    u'ImageHeight': 120,
--    u'ImageSize': u'160x120',
--    u'ImageWidth': 160,
--    u'InternalSerialNumber': u'',
--    u'InteropIndex': u'THM - DCF thumbnail file',
--    u'InteropVersion': u'0100',
--    u'ISO': 100,
--    u'ISOExpansion': u'Off',
--    u'ISOSpeedIncrements': u'1/3 Stop',
--    u'Lens35efl': u'70.0 - 200.0 mm (35 mm equivalent: 68.3 - 195.2 mm)',
--    u'LensAFStopButton': u'AF stop',
--    u'LensDriveNoAF': u'Focus search on',
--    u'LensID': u'Canon EF 70-200mm f/4L IS',
--    u'LensModel': u'EF70-200mm f/4L IS USM',
--    u'LensType': u'Canon EF 70-200mm f/4L IS',
--    u'Lens': u'70.0 - 200.0 mm',
--    u'LightValue': 13.6,
--    u'LiveViewShooting': u'On',
--    u'LongExposureNoiseReduction2': u'Off',
--    u'LongExposureNoiseReduction': u'Off',
--    u'LongFocal': u'200 mm',
--    u'MacroMode': u'Normal',
--    u'Make': u'Canon',
--    u'ManualFlashOutput': u'n/a',
--    u'MaxAperture': 4,
--    u'MeasuredEV': 12.5,
--    u'MeasuredEV2': 13,
--    u'MeteringMode': u'Center-weighted average',
--    #u'MIMEType': u'image/jpeg',
--    u'MinAperture': 32,
--    u'MirrorLockup': u'Disable',
--    u'Model': u'Canon EOS 5D Mark II',
--    u'ModifyDate': u'2010:10:19 20:43:14',
--    u'NDFilter': u'n/a',
--    u'OpticalZoomCode': u'n/a',
--    u'Orientation': u'Horizontal (normal)',
--    u'OwnerName': u'',
--    u'PictureStyle': u'User Def. 1',
--    u'Quality': u'Unknown (-1)',
--    u'RawJpgSize': u'Large',
--    u'RecordMode': u'Video',
--    u'RelatedImageHeight': 1080,
--    u'RelatedImageWidth': 1920,
--    u'ResolutionUnit': u'inches',
--    u'SafetyShift': u'Disable',
--    u'Saturation': u'Normal',
--    u'ScaleFactor35efl': 1.0,
--    u'SceneCaptureType': u'Standard',
--    u'SelfTimer': u'Off',
--    u'SensorBlueLevel': 0,
--    u'SensorBottomBorder': 3799,
--    u'SensorHeight': 3804,
--    u'SensorLeftBorder': 168,
--    u'SensorRedLevel': 0,
--    u'SensorRightBorder': 5783,
--    u'SensorTopBorder': 56,
--    u'SensorWidth': 5792,
--    u'SerialNumberFormat': u'Format 2',
--    u'SerialNumber': u'0820500998',
--    u'SetButtonWhenShooting': u'Normal (disabled)',
--    u'Sharpness': 3,
--    u'SharpnessFrequency': u'n/a',
--    u'ShootingMode': u'Manual',
--    u'ShortFocal': u'70 mm',
--    u'ShutterButtonAFOnButton': u'Metering + AF start',
--    u'ShutterSpeed': u'1/100',
--    u'ShutterSpeedValue': u'1/99',
--    u'SlowShutter': u'None',
--    #u'SourceFile': u'dmedia/tests/data/MVI_5751.THM',
--    u'SubSecCreateDate': u'2010:10:19 20:43:14.68',
--    u'SubSecDateTimeOriginal': u'2010:10:19 20:43:14.68',
--    u'SubSecModifyDate': u'2010:10:19 20:43:14.68',
--    u'SubSecTime': 68,
--    u'SubSecTimeDigitized': 68,
--    u'SubSecTimeOriginal': 68,
--    u'SuperimposedDisplay': u'On',
--    u'TargetAperture': 11,
--    u'TargetExposureTime': u'1/102',
--    u'ThumbnailImageValidArea': u'0 159 15 104',
--    u'ToneCurve': u'Standard',
--    u'UserComment': u'',
--    u'VRDOffset': 0,
--    #u'Warning': u'Invalid CanonAFInfo2 data',  Not present under Oneiric
--    u'WBBracketMode': u'Off',
--    u'WBBracketValueAB': 0,
--    u'WBBracketValueGM': 0,
--    u'WBShiftAB': 0,
--    u'WBShiftGM': 0,
--    u'WhiteBalanceBlue': 0,
--    u'WhiteBalanceRed': 0,
--    u'WhiteBalance': u'Daylight',
--    u'XResolution': 72,
--    u'YCbCrPositioning': u'Co-sited',
--    u'YCbCrSubSampling': u'YCbCr4:2:2 (2 1)',
--    u'YResolution': 72,
--    u'ZoomSourceWidth': 0,
--    u'ZoomTargetWidth': 0,
--    u'BitsPerSample': 8,
++    'AddOriginalDecisionData': 'Off',
++    'AEBAutoCancel': 'On',
++    'AEBBracketValue': 0,
++    'AEBSequence': '0,-,+',
++    'AFAssistBeam': 'Emits',
++    'AFMicroAdjActive': 'No',
++    'AFMicroadjustment': 'Disable; 0; 0; 0; 0',
++    'AFMicroAdjValue': 0,
++    'AFOnAELockButtonSwitch': 'Disable',
++    'AFPointAreaExpansion': 'Disable',
++    'AFPointSelectionMethod': 'Normal',
++    'Aperture': 11.0,
++    'ApertureValue': 11.300000000000001,
++    'Artist': '',
++    'AssignFuncButton': 'LCD brightness',
++    'AutoExposureBracketing': 'Off',
++    'AutoISO': 100,
++    'AutoLightingOptimizer': 'Disable',
++    'BaseISO': 100,
++    'BlackMaskBottomBorder': 0,
++    'BlackMaskLeftBorder': 0,
++    'BlackMaskRightBorder': 0,
++    'BlackMaskTopBorder': 0,
++    'BracketMode': 'Off',
++    'BracketShotNumber': 0,
++    'BracketValue': 0,
++    'BulbDuration': 0,
++    'CameraType': 'EOS High-end',
++    'CanonExposureMode': 'Manual',
++    'CanonFirmwareVersion': 'Firmware Version 2.0.7',
++    'CanonFlashMode': 'Off',
++    'CanonImageSize': '1920x1080 Movie',
++    'CanonImageType': 'MVI:Canon EOS 5D Mark II',
++    'CanonModelID': 'EOS 5D Mark II',
++    'CircleOfConfusion': '0.031 mm',
++    'ColorComponents': 3,
++    'ColorSpace': 'sRGB',
++    'ColorTemperature': 3600,
++    'ColorTone': 'Normal',
++    'ComponentsConfiguration': 'Y, Cb, Cr, -',
++    'ContinuousDrive': 'Movie',
++    'Contrast': -4,
++    'ControlMode': 'Camera Local Control',
++    'Copyright': '',
++    'CreateDate': '2010:10:19 20:43:14',
++    'CustomRendered': 'Normal',
++    'DateTimeOriginal': '2010:10:19 20:43:14',
++    'DialDirectionTvAv': 'Normal',
++    'DigitalGain': 0,
++    'DigitalZoom': 'None',
++    #'Directory': 'dmedia/tests/data',
++    'DriveMode': 'Continuous Shooting',
++    'EasyMode': 'Manual',
++    'EncodingProcess': 'Baseline DCT, Huffman coding',
++    #'ExifByteOrder': 'Little-endian (Intel, II)',
++    'ExifImageHeight': 120,
++    'ExifImageWidth': 160,
++    #'ExifToolVersion': 8.1500000000000004,
++    'ExifVersion': '0221',
++    'ExposureCompensation': 0,
++    'ExposureLevelIncrements': '1/3 Stop',
++    'ExposureMode': 'Auto',
++    'ExposureProgram': 'Manual',
++    'ExposureTime': '1/100',
++    #'FileModifyDate': '2010:10:19 20:43:18-06:00',
++    #'FileName': 'MVI_5751.THM',
++    #'FilePermissions': 'rw-r--r--',
++    #'FileSize': '27 kB',
++    #'FileType': 'JPEG',
++    'FlashActivity': 0,
++    'FlashBits': '(none)',
++    'FlashExposureComp': 0, 'SequenceNumber': 0,
++    'FlashExposureLock': 'Off',
++    'FlashGuideNumber': 0,
++    'FlashpixVersion': '0100',
++    'FlashSyncSpeedAv': 'Auto',
++    'Flash': 'Off, Did not fire',
++    'FNumber': 11.0,
++    'FocalLength35efl': '138.0 mm (35 mm equivalent: 134.7 mm)',
++    'FocalLength': '138.0 mm',
++    'FocalPlaneResolutionUnit': 'inches',
++    'FocalPlaneXResolution': 109.6641535,
++    'FocalPlaneYResolution': 125.26096029999999,
++    'FocalUnits': '1/mm',
++    'FocusingScreen': 'Eg-D',
++    'FocusMode': 'Manual Focus (3)',
++    'FocusRange': 'Not Known',
++    'FOV': '15.2 deg',
++    'GPSVersionID': '2.2.0.0',
++    'HighISONoiseReduction': 'Standard',
++    'HighlightTonePriority': 'Disable',
++    'HyperfocalDistance': '56.23 m',
++    'ImageHeight': 120,
++    'ImageSize': '160x120',
++    'ImageWidth': 160,
++    'InternalSerialNumber': '',
++    'InteropIndex': 'THM - DCF thumbnail file',
++    'InteropVersion': '0100',
++    'ISO': 100,
++    'ISOExpansion': 'Off',
++    'ISOSpeedIncrements': '1/3 Stop',
++    'Lens35efl': '70.0 - 200.0 mm (35 mm equivalent: 68.3 - 195.2 mm)',
++    'LensAFStopButton': 'AF stop',
++    'LensDriveNoAF': 'Focus search on',
++    'LensID': 'Canon EF 70-200mm f/4L IS',
++    'LensModel': 'EF70-200mm f/4L IS USM',
++    'LensType': 'Canon EF 70-200mm f/4L IS',
++    'Lens': '70.0 - 200.0 mm',
++    'LightValue': 13.6,
++    'LiveViewShooting': 'On',
++    'LongExposureNoiseReduction2': 'Off',
++    'LongExposureNoiseReduction': 'Off',
++    'LongFocal': '200 mm',
++    'MacroMode': 'Normal',
++    'Make': 'Canon',
++    'ManualFlashOutput': 'n/a',
++    'MaxAperture': 4,
++    'MeasuredEV': 12.5,
++    'MeasuredEV2': 13,
++    'MeteringMode': 'Center-weighted average',
++    #'MIMEType': 'image/jpeg',
++    'MinAperture': 32,
++    'MirrorLockup': 'Disable',
++    'Model': 'Canon EOS 5D Mark II',
++    'ModifyDate': '2010:10:19 20:43:14',
++    'NDFilter': 'n/a',
++    'OpticalZoomCode': 'n/a',
++    'Orientation': 'Horizontal (normal)',
++    'OwnerName': '',
++    'PictureStyle': 'User Def. 1',
++    'Quality': 'Unknown (-1)',
++    'RawJpgSize': 'Large',
++    'RecordMode': 'Video',
++    'RelatedImageHeight': 1080,
++    'RelatedImageWidth': 1920,
++    'ResolutionUnit': 'inches',
++    'SafetyShift': 'Disable',
++    'Saturation': 'Normal',
++    'ScaleFactor35efl': 1.0,
++    'SceneCaptureType': 'Standard',
++    'SelfTimer': 'Off',
++    'SensorBlueLevel': 0,
++    'SensorBottomBorder': 3799,
++    'SensorHeight': 3804,
++    'SensorLeftBorder': 168,
++    'SensorRedLevel': 0,
++    'SensorRightBorder': 5783,
++    'SensorTopBorder': 56,
++    'SensorWidth': 5792,
++    'SerialNumberFormat': 'Format 2',
++    'SerialNumber': '0820500998',
++    'SetButtonWhenShooting': 'Normal (disabled)',
++    'Sharpness': 3,
++    'SharpnessFrequency': 'n/a',
++    'ShootingMode': 'Manual',
++    'ShortFocal': '70 mm',
++    'ShutterButtonAFOnButton': 'Metering + AF start',
++    'ShutterSpeed': '1/100',
++    'ShutterSpeedValue': '1/99',
++    'SlowShutter': 'None',
++    #'SourceFile': 'dmedia/tests/data/MVI_5751.THM',
++    'SubSecCreateDate': '2010:10:19 20:43:14.68',
++    'SubSecDateTimeOriginal': '2010:10:19 20:43:14.68',
++    'SubSecModifyDate': '2010:10:19 20:43:14.68',
++    'SubSecTime': 68,
++    'SubSecTimeDigitized': 68,
++    'SubSecTimeOriginal': 68,
++    'SuperimposedDisplay': 'On',
++    'TargetAperture': 11,
++    'TargetExposureTime': '1/102',
++    'ThumbnailImageValidArea': '0 159 15 104',
++    'ToneCurve': 'Standard',
++    'UserComment': '',
++    'VRDOffset': 0,
++    #'Warning': 'Invalid CanonAFInfo2 data',  Not present under Oneiric
++    'WBBracketMode': 'Off',
++    'WBBracketValueAB': 0,
++    'WBBracketValueGM': 0,
++    'WBShiftAB': 0,
++    'WBShiftGM': 0,
++    'WhiteBalanceBlue': 0,
++    'WhiteBalanceRed': 0,
++    'WhiteBalance': 'Daylight',
++    'XResolution': 72,
++    'YCbCrPositioning': 'Co-sited',
++    'YCbCrSubSampling': 'YCbCr4:2:2 (2 1)',
++    'YResolution': 72,
++    'ZoomSourceWidth': 0,
++    'ZoomTargetWidth': 0,
++    'BitsPerSample': 8,
+ }
  # These values are new running on Oneiric
  sample_thm_exif2 = {
--    u'CropLeftMargin': 24,
--    u'CropRightMargin': 24,
--    u'CropTopMargin': 16,
--    u'CropBottomMargin': 16,
--
--    u'CroppedImageWidth': 2784,
--    u'CroppedImageHeight': 1856,
--
--    u'VideoCodec': u'avc1',
--
--    u'AudioBitrate': u'1.54 Mbps',
--    u'CustomPictureStyleFileName': u'superflat01',
--    u'Duration': u'3.00 s',
--    u'FrameRate': 29.97,
--
--    u'AudioChannels': 2,
--    u'AudioSampleRate': 48000,
--    u'CameraTemperature': u'30 C',
--
--    u'AspectRatio': u'3:2',
--
--    u'FrameCount': 107,
++    'CropLeftMargin': 24,
++    'CropRightMargin': 24,
++    'CropTopMargin': 16,
++    'CropBottomMargin': 16,
++
++    'CroppedImageWidth': 2784,
++    'CroppedImageHeight': 1856,
++
++    'VideoCodec': 'avc1',
++
++    'AudioBitrate': '1.54 Mbps',
++    'CustomPictureStyleFileName': 'superflat01',
++    'Duration': '3.00 s',
++    'FrameRate': 29.97,
++
++    'AudioChannels': 2,
++    'AudioSampleRate': 48000,
++    'CameraTemperature': '30 C',
++
++    'AspectRatio': '3:2',
++
++    'FrameCount': 107,
+ }
  sample_thm_exif.update(sample_thm_exif2)
@@ -268,21 +268,21 @@
+ }
--class test_functions(TestCase):
++class TestFunctions(SampleFilesTestCase):
      def test_file_2_base64(self):
          f = extractor.file_2_base64
          tmp = TempDir()
--        src = tmp.write('Hello naughty nurse!', 'sample.txt')
++        src = tmp.write(b'Hello naughty nurse!', 'sample.txt')
          self.assertEqual(
              base64.b64decode(f(src)),
--            'Hello naughty nurse!'
++            b'Hello naughty nurse!'
+         )
      def test_extract_exif(self):
          f = extractor.extract_exif
--        exif = f(sample_thm)
++        exif = f(self.thm)
          self.assertEqual(set(sample_thm_exif), set(exif))
          for key in sample_thm_exif:
              v1 = sample_thm_exif[key]
@@ -292,20 +292,16 @@
          # Test that error is returned for invalid file:
          tmp = TempDir()
--        data = 'Foo Bar\n' * 1000
++        data = b'Foo Bar\n' * 1000
          jpg = tmp.write(data, 'sample.jpg')
          self.assertEqual(
              f(jpg),
--            {u'Error': u'File format error'}
++            {'Error': 'File format error'}
+         )
          # Test with non-existent file:
          nope = tmp.join('nope.jpg')
--        self.assertEqual(
--            f(nope),
--            {u'Error': u'ValueError: No JSON object could be decoded'}
--        )
--
++        self.assertEqual(f(nope), {})
      def test_parse_subsec_datetime(self):
          f = extractor.parse_subsec_datetime
@@ -365,11 +361,11 @@
          tmp = TempDir()
          # Test with sample_mov from 5D Mark II:
--        info = f(sample_mov)
++        info = f(self.mov)
          self.assertEqual(sample_mov_info, info)
          # Test invalid file:
--        invalid = tmp.write('Wont work!', 'invalid.mov')
++        invalid = tmp.write(b'Wont work!', 'invalid.mov')
          self.assertEqual(
              f(invalid),
+             {
@@ -394,18 +390,14 @@
          tmp = TempDir()
          # Test with sample_mov from 5D Mark II:
--        d = f(sample_mov)
++        d = f(self.mov)
          self.assertTrue(isinstance(d, dict))
          self.assertEqual(sorted(d), ['content_type', 'data'])
          self.assertEqual(d['content_type'], 'image/jpeg')
          data = base64.b64decode(d['data'])
--        jpg = tmp.write(data, 'thumbnail.jpg')
--        img = Image.open(jpg)
--        self.assertEqual(img.size, (192, 108))
--        self.assertEqual(img.format, 'JPEG')
          # Test invalid file:
--        invalid = tmp.write('Wont work!', 'invalid.mov')
++        invalid = tmp.write(b'Wont work!', 'invalid.mov')
          self.assertEqual(f(invalid), None)
          # Test with non-existent file:
@@ -418,7 +410,7 @@
          tmp = TempDir()
          doc = dict(ext='mov')
--        f(sample_mov, doc)
++        f(self.mov, doc)
          # Check canon.thm attachment
          att = doc.pop('_attachments')
@@ -427,7 +419,7 @@
          self.assertEqual(att['canon.thm']['content_type'], 'image/jpeg')
          self.assertEqual(
              base64.b64decode(att['canon.thm']['data']),
--            open(sample_thm, 'r').read()
++            open(self.thm, 'rb').read()
+         )
          # Check thumbnail
@@ -436,10 +428,6 @@
          self.assertEqual(sorted(thm), ['content_type', 'data'])
          self.assertEqual(thm['content_type'], 'image/jpeg')
          data = base64.b64decode(thm['data'])
--        jpg = tmp.write(data, 'thumbnail.jpg')
--        img = Image.open(jpg)
--        self.assertEqual(img.size, (192, 108))
--        self.assertEqual(img.format, 'JPEG')
          self.assertEqual(
              doc,
@@ -456,32 +444,32 @@
                      fps=30,
                      channels='Stereo',
                      iso=100,
--                    shutter=u'1/100',
++                    shutter='1/100',
                      aperture=11.0,
--                    lens=u'Canon EF 70-200mm f/4L IS',
--                    camera=u'Canon EOS 5D Mark II',
--                    camera_serial=u'0820500998',
--                    focal_length=u'138.0 mm',
++                    lens='Canon EF 70-200mm f/4L IS',
++                    camera='Canon EOS 5D Mark II',
++                    camera_serial='0820500998',
++                    focal_length='138.0 mm',
                  ),
+             )
+         )
      def test_merge_exif(self):
          f = extractor.merge_exif
--        self.assertTrue(sample_thm.endswith('.THM'))
++        self.assertTrue(self.thm.endswith('.THM'))
          attachments = {}
          self.assertEqual(
--            dict(f(sample_thm, attachments)),
++            dict(f(self.thm, attachments)),
              dict(
                  width=160,
                  height=120,
                  iso=100,
--                shutter=u'1/100',
++                shutter='1/100',
                  aperture=11.0,
--                lens=u'Canon EF 70-200mm f/4L IS',
--                camera=u'Canon EOS 5D Mark II',
--                camera_serial=u'0820500998',
--                focal_length=u'138.0 mm',
++                lens='Canon EF 70-200mm f/4L IS',
++                camera='Canon EOS 5D Mark II',
++                camera_serial='0820500998',
++                focal_length='138.0 mm',
                  mtime=1287520994 + 68 / 100.0,
              ),
+         )
@@ -493,7 +481,7 @@
          tmp = TempDir()
          att = {}
--        merged = dict(f(sample_mov, att))
++        merged = dict(f(self.mov, att))
          # Check canon.thm attachment
          self.assertEqual(set(att), set(['thumbnail', 'canon.thm']))
@@ -501,7 +489,7 @@
          self.assertEqual(att['canon.thm']['content_type'], 'image/jpeg')
          self.assertEqual(
              base64.b64decode(att['canon.thm']['data']),
--            open(sample_thm, 'r').read()
++            open(self.thm, 'rb').read()
+         )
          # Check thumbnail
@@ -510,10 +498,6 @@
          self.assertEqual(sorted(thm), ['content_type', 'data'])
          self.assertEqual(thm['content_type'], 'image/jpeg')
          data = base64.b64decode(thm['data'])
--        jpg = tmp.write(data, 'thumbnail.jpg')
--        img = Image.open(jpg)
--        self.assertEqual(img.size, (192, 108))
--        self.assertEqual(img.format, 'JPEG')
          self.assertEqual(
              merged,
@@ -527,19 +511,19 @@
                  fps=30,
                  channels='Stereo',
                  iso=100,
--                shutter=u'1/100',
++                shutter='1/100',
                  aperture=11.0,
--                lens=u'Canon EF 70-200mm f/4L IS',
--                camera=u'Canon EOS 5D Mark II',
--                camera_serial=u'0820500998',
--                focal_length=u'138.0 mm',
++                lens='Canon EF 70-200mm f/4L IS',
++                camera='Canon EOS 5D Mark II',
++                camera_serial='0820500998',
++                focal_length='138.0 mm',
                  mtime=1287520994 + 68 / 100.0,
+             )
+         )
          # Test invalid file:
--        invalid_mov = tmp.write('Wont work!', 'invalid.mov')
--        invalid_thm = tmp.write('Wont work either!', 'invalid.thm')
++        invalid_mov = tmp.write(b'Wont work!', 'invalid.mov')
++        invalid_thm = tmp.write(b'Wont work either!', 'invalid.thm')
          att = {}
          merged = dict(f(invalid_mov, att))
          self.assertEqual(merged, {})
 === modified file 'dmedia/tests/test_filestore.py'
 --- dmedia/tests/test_filestore.py	2011-06-15 04:46:55 +0000
 +++ dmedia/tests/test_filestore.py	2011-09-22 11:43:29 +0000
@@ -21,1323 +21,35 @@
  # with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
  """
--Unit tests for `dmedia.filestore` module.
++Unit tests for external filestore, dmedia style.
  """
--import os
--from os import path
--import stat
--from hashlib import sha1
--from base64 import b32encode, b32decode
--import shutil
--import json
--from unittest import TestCase
--from .helpers import TempDir, TempHome, raises
--from .helpers import sample_mov, sample_thm
--from .helpers import mov_hash, mov_leaves, mov_qid
--from .helpers import thm_hash, thm_leaves, thm_qid
--from dmedia.errors import AmbiguousPath, FileStoreTraversal
--from dmedia.errors import DuplicateFile, IntegrityError
--from dmedia.filestore import HashList
--from dmedia import filestore, constants
--from dmedia.constants import TYPE_ERROR, EXT_PAT, LEAF_SIZE
--
--
--class test_functions(TestCase):
--    def test_safe_path(self):
--        f = filestore.safe_path
--
--        # Test with relative path:
--        e = raises(AmbiguousPath, f, 'foo/bar')
--        self.assertEqual(e.pathname, 'foo/bar')
--        self.assertEqual(e.abspath, path.abspath('foo/bar'))
--
--        # Test with path traversal:
--        e = raises(AmbiguousPath, f, '/foo/bar/../../root')
--        self.assertEqual(e.pathname, '/foo/bar/../../root')
--        self.assertEqual(e.abspath, '/root')
--
--        # Test with normalized absolute path:
--        self.assertEqual(f('/home/jderose/.dmedia'), '/home/jderose/.dmedia')
--
--    def test_safe_open(self):
--        f = filestore.safe_open
--        tmp = TempDir()
--        filename = tmp.touch('example.mov')
--
--        # Test that AmbiguousPath is raised:
--        e = raises(AmbiguousPath, f, 'foo/bar', 'rb')
--        self.assertEqual(e.pathname, 'foo/bar')
--        self.assertEqual(e.abspath, path.abspath('foo/bar'))
--
--        e = raises(AmbiguousPath, f, '/foo/bar/../../root', 'rb')
--        self.assertEqual(e.pathname, '/foo/bar/../../root')
--        self.assertEqual(e.abspath, '/root')
--
--        # Test with absolute normalized path:
--        fp = f(filename, 'rb')
--        self.assertTrue(isinstance(fp, file))
--        self.assertEqual(fp.name, filename)
--        self.assertEqual(fp.mode, 'rb')
--
--    def test_safe_ext(self):
--        f = filestore.safe_ext
--
--        # Test with wrong type
--        e = raises(TypeError, f, 42)
--        self.assertEqual(
--            str(e),
--            TYPE_ERROR % ('ext', basestring, int, 42)
--        )
--
--        # Test with invalid case:
--        bad = 'ogV'
--        e = raises(ValueError, f, bad)
--        self.assertEqual(
--            str(e),
--            'ext %r does not match pattern %r' % (bad, EXT_PAT)
--        )
--
--        # Test with invalid charaters:
--        bad = '$home'
--        e = raises(ValueError, f, bad)
--        self.assertEqual(
--            str(e),
--            'ext %r does not match pattern %r' % (bad, EXT_PAT)
--        )
--
--        # Test with path traversal:
--        bad = '/../../../.ssh/id_pub'
--        e = raises(ValueError, f, bad)
--        self.assertEqual(
--            str(e),
--            'ext %r does not match pattern %r' % (bad, EXT_PAT)
--        )
--
--        # Test with a good ext:
--        good = 'wav'
--        assert f(good) is good
--        good = 'cr2'
--        assert f(good) is good
--        good = 'tar.gz'
--        assert f(good) is good
--
--    def test_safe_b32(self):
--        f = filestore.safe_b32
--
--        # Test with wrong type
--        e = raises(TypeError, f, 42)
--        self.assertEqual(
--            str(e),
--            TYPE_ERROR % ('b32', basestring, int, 42)
--        )
--
--        # Test with invalid base32 encoding:
--        bad = 'NWBNVXVK5DQGIOW7MYR4K3KA5K22W7N'
--        e = raises(ValueError, f, bad)
--        self.assertEqual(
--            str(e),
--            'b32: cannot b32decode %r: Incorrect padding' % bad
--        )
--
--        # Test with wrong length:
--        bad = 'NWBNVXVK5DQGIOW7MYR4K3KA'
--        e = raises(ValueError, f, bad)
--        self.assertEqual(
--            str(e),
--            'len(b32) must be 32; got 24: %r' % bad
--        )
--
--        # Test with a good chash:
--        good = 'NWBNVXVK5DQGIOW7MYR4K3KA5K22W7NW'
--        assert f(good) is good
--
--    def test_tophash(self):
--        f = filestore.tophash
--        h = f(31415)
--        self.assertEqual(
--            h.digest(),
--            sha1(b'dmedia/tophash 31415').digest()
--        )
--        l = ''.join(mov_leaves)
--        h.update(l)
--        self.assertEqual(
--            h.digest(),
--            sha1(b'dmedia/tophash 31415' + l).digest()
--        )
--
--    def test_leafhash(self):
--        f = filestore.leafhash
--        l = ''.join(mov_leaves)
--
--        h = f(1079991, 0)
--        self.assertEqual(
--            h.digest(),
--            sha1(b'dmedia/leafhash 1079991 0').digest()
--        )
--        h.update(l)
--        self.assertEqual(
--            h.digest(),
--            sha1(b'dmedia/leafhash 1079991 0' + l).digest()
--        )
--
--        h = f(1079991, 1)
--        self.assertEqual(
--            h.digest(),
--            sha1(b'dmedia/leafhash 1079991 1').digest()
--        )
--        h.update(l)
--        self.assertEqual(
--            h.digest(),
--            sha1(b'dmedia/leafhash 1079991 1' + l).digest()
--        )
--
--    def test_pack_leaves(self):
--        f = filestore.pack_leaves
--
--        a = 'a' * 20
--        b = 'b' * 20
--        c = 'c' * 20
--        d = 'd' * 20
--        self.assertEqual(f([a, b, c]), a + b + c)
--        self.assertEqual(f([a, b, c, d]), a + b + c + d)
--
--        e = raises(ValueError, f, [a, b, c], digest_bytes=25)
--        self.assertEqual(
--            str(e),
--            'digest_bytes=25, but len(leaves[0]) is 20'
--        )
--        e = raises(ValueError, f, [a, 'b' * 15, c])
--        self.assertEqual(
--            str(e),
--            'digest_bytes=20, but len(leaves[1]) is 15'
--        )
--
--    def test_unpack_leaves(self):
--        f = filestore.unpack_leaves
--
--        a = 'a' * 20
--        b = 'b' * 20
--        c = 'c' * 20
--        d = 'd' * 20
--        data = a + b + c + d
--        self.assertEqual(f(data), [a, b, c, d])
--
--        a = 'a' * 32
--        b = 'b' * 32
--        c = 'c' * 32
--        d = 'd' * 32
--        e = 'e' * 32
--        data = a + b + c + d + e
--        self.assertEqual(f(data, digest_bytes=32), [a, b, c, d, e])
--
--        e = raises(ValueError, f, 'a' * 201)
--        self.assertEqual(
--            str(e),
--            'len(data)=201, not multiple of digest_bytes=20'
--        )
--        e = raises(ValueError, f, 'a' * 200, digest_bytes=16)
--        self.assertEqual(
--            str(e),
--            'len(data)=200, not multiple of digest_bytes=16'
--        )
--
--    def test_quick_id(self):
--        f = filestore.quick_id
--
--        # Test with fp of wrong type
--        e = raises(TypeError, f, 'hello')
--        self.assertEqual(
--            str(e),
--            TYPE_ERROR % ('fp', file, str, 'hello')
--        )
--
--        # Test with fp opened in wrong mode
--        fp = open(sample_mov, 'r')
--        e = raises(ValueError, f, fp)
--        self.assertEqual(
--            str(e),
--            "fp: must be opened in mode 'rb'; got 'r'"
--        )
--
--        # Test with some known files/values:
--        fp = open(sample_mov, 'rb')
--        self.assertEqual(f(fp), 'GJ4AQP3BK3DMTXYOLKDK6CW4QIJJGVMN')
--        self.assertFalse(fp.closed)  # Should not close file
--
--        fp = open(sample_thm, 'rb')
--        self.assertEqual(f(fp), 'EYCDXXCNDB6OIIX5DN74J7KEXLNCQD5M')
--        self.assertFalse(fp.closed)  # Should not close file
--
--        # Make user seek(0) is being called:
--        fp = open(sample_mov, 'rb')
--        fp.seek(1024)
--        self.assertEqual(f(fp), 'GJ4AQP3BK3DMTXYOLKDK6CW4QIJJGVMN')
--        self.assertFalse(fp.closed)  # Should not close file
--
--    def test_fallocate(self):
--        f = filestore.fallocate
--        tmp = TempDir()
--        filename = tmp.join('example.mov')
--
--        # Test when size is wrong type:
--        e = raises(TypeError, f, '2311', filename)
--        self.assertEqual(
--            str(e),
--            TYPE_ERROR % ('size', (int, long), str, '2311')
--        )
--
--        # Test when size <= 0
--        e = raises(ValueError, f, 0, filename)
--        self.assertEqual(str(e), 'size must be >0; got 0')
--        e = raises(ValueError, f, -2311, filename)
--        self.assertEqual(str(e), 'size must be >0; got -2311')
--
--        # Test with relative path:
--        e = raises(AmbiguousPath, f, 2311, 'foo/bar')
--        self.assertEqual(e.pathname, 'foo/bar')
--        self.assertEqual(e.abspath, path.abspath('foo/bar'))
--
--        # Test with path traversal:
--        e = raises(AmbiguousPath, f, 2311, '/foo/bar/../../root')
--        self.assertEqual(e.pathname, '/foo/bar/../../root')
--        self.assertEqual(e.abspath, '/root')
--
--        # Test with correct args:
--        self.assertFalse(path.exists(filename))
--        ret = f(2311, filename)
--        self.assertTrue(ret in [None, True, False])
--
--        if ret is None:
--            self.assertFalse(path.exists(filename))
--
--        if ret is True:
--            self.assertTrue(path.exists(filename))
--            self.assertEqual(path.getsize(filename), 2311)
--
--        if ret is False:
--            self.assertTrue(path.exists(filename))
--            self.assertEqual(path.getsize(filename), 0)
--
--
--class test_HashList(TestCase):
--    klass = filestore.HashList
--
--    def test_init(self):
--        tmp = TempDir()
--        src_fp = open(sample_mov, 'rb')
--        dst_fp = open(tmp.join('test.mov'), 'wb')
--
--        # Test with src_fp of wrong type
--        e = raises(TypeError, self.klass, 'hello', dst_fp)
--        self.assertEqual(
--            str(e),
--            TYPE_ERROR % ('src_fp', file, str, 'hello')
--        )
--
--        # Test with src_fp opened in wrong mode
--        e = raises(ValueError, self.klass, open(sample_mov, 'r'), dst_fp)
--        self.assertEqual(
--            str(e),
--            "src_fp: mode must be 'rb'; got 'r'"
--        )
--
--        # Test with dst_fp of wrong type
--        e = raises(TypeError, self.klass, src_fp, 17)
--        self.assertEqual(
--            str(e),
--            TYPE_ERROR % ('dst_fp', file, int, 17)
--        )
--
--        # Test with dst_fp opened in wrong mode
--        e = raises(ValueError, self.klass, src_fp,
--            open(tmp.join('wrong.mov'), 'w')
--        )
--        self.assertEqual(
--            str(e),
--            "dst_fp: mode must be 'wb' or 'r+b'; got 'w'"
--        )
--
--        # Test with correct values
--        inst = self.klass(src_fp)
--        self.assertTrue(inst.src_fp is src_fp)
--        self.assertEqual(inst.file_size, os.fstat(src_fp.fileno()).st_size)
--        self.assertEqual(inst.leaves, [])
--        self.assertTrue(inst.dst_fp is None)
--        self.assertEqual(inst.leaf_size, constants.LEAF_SIZE)
--
--        inst = self.klass(src_fp, dst_fp)
--        self.assertTrue(inst.src_fp is src_fp)
--        self.assertTrue(inst.dst_fp is dst_fp)
--        self.assertEqual(inst.leaf_size, constants.LEAF_SIZE)
--
--        inst = self.klass(src_fp, dst_fp, 2 * constants.LEAF_SIZE)
--        self.assertTrue(inst.src_fp is src_fp)
--        self.assertTrue(inst.dst_fp is dst_fp)
--        self.assertEqual(inst.leaf_size, 2 * constants.LEAF_SIZE)
--
--    def test_update(self):
--        tmp = TempDir()
--
--        class Example(self.klass):
--            def __init__(self, dst_fp=None):
--                self.dst_fp = dst_fp
--

Dmedia

Merge lp:~jderose/dmedia/2-core-bringup into lp:dmedia

Commit message

Description of the change

Preview Diff

Subscribers