Sextant

Merge lp:~ben-hutchings/ensoft-sextant/csv-upload into lp:ensoft-sextant

csv-upload
Merge into whiteline

Proposed by Ben Hutchings on 2014-10-23

Status:

Merged

Approved by:

Robert on 2014-10-23

Approved revision:

Merged at revision:

Proposed branch:

lp:~ben-hutchings/ensoft-sextant/csv-upload

Merge into:

lp:ensoft-sextant

Diff against target:

3573 lines (+1945/-1015)

16 files modified

src/sextant/__main__.py (+54/-232)
src/sextant/csvwriter.py (+152/-0)
src/sextant/db_api.py (+587/-302)
src/sextant/export.py (+3/-3)
src/sextant/objdump_parser.py (+302/-262)
src/sextant/query.py (+29/-31)
src/sextant/sshmanager.py (+278/-0)
src/sextant/test_all.sh (+4/-0)
src/sextant/test_csvwriter.py (+89/-0)
src/sextant/test_db_api.py (+68/-54)
src/sextant/test_parser.py (+85/-0)
src/sextant/test_resources/parser_test.c (+57/-0)
src/sextant/test_resources/parser_test.dump (+44/-0)
src/sextant/test_sshmanager.py (+72/-0)
src/sextant/update_db.py (+96/-62)
src/sextant/web/server.py (+25/-69)

To merge this branch:

bzr merge lp:~ben-hutchings/ensoft-sextant/csv-upload

High

Fix Committed

Link a bug report

Reviewer	Review Type	Date Requested	Status
Robert		2014-10-23	Approve on 2014-10-23
Review via email: mp+239356@code.launchpad.net

Commit message

Programs now upload by first being parsed into csv files, then uploaded from these to the database. This is _significantly_ faster for large programs.

Furthermore, the structure of the program nodes in the database has been changed - whereas before they were unlabelled nodes with type 'program', they are now associated with the 'program' label (the database partitions on label - so this labelling keeps programs distinct from the functions). All queries in sextant have been updated to reflect this.

New module sshmanager handles the ssh connection to the database server.
New module csvwriter deals with the nuts and bolts of the csv files.

Description of the change

Programs now upload by first being parsed into csv files, then uploaded from these to the database. This is _significantly_ faster for large programs.

New module sshmanager handles the ssh connection to the database server.
New module csvwriter deals with the nuts and bolts of the csv files.

Revision history for this message

Robert (rjwills) on 2014-10-23:

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Ben Hutchings

Ensoft Patch Lander

Patrick Stevens

 === modified file 'src/sextant/__main__.py'
 --- src/sextant/__main__.py	2014-10-03 13:00:52 +0000
 +++ src/sextant/__main__.py	2014-10-23 12:33:12 +0000
@@ -9,7 +9,6 @@
  import io
  import sys
--import random
  import socket
  import logging
  import logging.config
@@ -28,10 +27,12 @@
  from . import db_api
  from . import update_db
  from . import environment
++from . import sshmanager
  config = environment.load_config()
++
  def _displayable_url(args):
      """
      Return the URL specified by the user for Sextant to look at.
@@ -56,7 +57,7 @@
  # Beginning of functions which handle the actual invocation of Sextant
--def _start_web(args):
++def _start_web(connection, args):
      # Don't import at top level - makes twisted dependency semi-optional,
      # allowing non-web functionality to work with Python 3.
      if sys.version_info[0] == 2:
@@ -68,12 +69,12 @@
      logging.info("Serving site on port {}".format(args.port))
      # server is .web.server, imported a couple of lines ago
--    server.serve_site(input_database_url=args.remote_neo4j, port=args.port)
--
--
--def _audit(args):
++    server.serve_site(connection, args.port)
++
++
++def _audit(connection, args):
      try:
--        audited = query.audit(args.remote_neo4j)
++        audited = query.audit(connection)
      except requests.exceptions.ConnectionError as e:
          msg = 'Connection error to server {url}: {exception}'
          logging.error(msg.format(url=_displayable_url(args), exception=e))
@@ -87,8 +88,8 @@
          titles = ("Name", "#Func", "Uploader", "User-ID", "Upload Date")
          colminlens = (len(entry) for entry in titles)
          # maximum lengths to avoid one entry from throwing the whole table
--        # date format is <YYYY:MM:DD HH:MM:SS.UUUUUU> = 26 characters
--        COLMAXLENS = (25, 5, 25, 10, 26)
++        # date format is <YYYY-MM-DD HH:MM:SS> = 19 characters
++        COLMAXLENS = (25, 6, 25, 10, 19)
          # make a table of the strings of each data entry we will display
          text = [map(str, (p.program_name, p.number_of_funcs,
@@ -120,7 +121,7 @@
          print('\n'.join(st.format(*pentry) for pentry in text))
--def _add_program(args):
++def _add_program(connection, args):
      try:
          alternative_name = args.name_in_db[0]
      except TypeError:
@@ -131,12 +132,11 @@
      # unsupplied
      try:
--        update_db.upload_program(user_name=getpass.getuser(),
--                                 file_path=args.input_file,
--                                 db_url=args.remote_neo4j,
--                                 alternative_name=alternative_name,
--                                 not_object_file=not_object_file,
--                                 display_url=_displayable_url(args))
++        update_db.upload_program(connection,
++                                 getpass.getuser(),
++                                 args.input_file,
++                                 alternative_name,
++                                 not_object_file)
      except requests.exceptions.ConnectionError as e:
          msg = 'Connection error to server {}: {}'
          logging.error(msg.format(_displayable_url(args), e))
@@ -147,41 +147,41 @@
          logging.error('Input file {} was not found.'.format(args.input_file[0]))
          logging.error(e)
          logging.debug(e, exc_info=True)
--    except ValueError as e:
++    except (ValueError, sshmanager.SSHConnectionError) as e:
          logging.error(e)
--def _delete_program(namespace):
--    update_db.delete_program(namespace.program_name,
--                             namespace.remote_neo4j)
--
--
--def _make_query(namespace):
++def _delete_program(connection, args):
++    update_db.delete_program(connection, args.program_name)
++
++
++def _make_query(connection, args):
      arg1 = None
      arg2 = None
      try:
--        arg1 = namespace.funcs[0]
--        arg2 = namespace.funcs[1]
++        arg1 = args.funcs[0]
++        arg2 = args.funcs[1]
      except TypeError:
          pass
      except IndexError:
          pass
      try:
--        program_name = namespace.program[0]
++        program_name = args.program[0]
      except TypeError:
          program_name = None
      try:
--        suppress_common = namespace.suppress_common[0]
++        suppress_common = args.suppress_common[0]
      except TypeError:
          suppress_common = False
--    query.query(remote_neo4j=namespace.remote_neo4j,
--                display_neo4j=_displayable_url(namespace),
--                input_query=namespace.query,
++    query.query(remote_neo4j=args.remote_neo4j,
++                display_neo4j=_displayable_url(args),
++                input_query=args.query,
                  program_name=program_name,
--                argument_1=arg1, argument_2=arg2,
++                argument_1=arg1,
++                argument_2=arg2,
                  suppress_common=suppress_common)
  # End of functions which invoke Sextant
@@ -197,8 +197,10 @@
      """
--    argumentparser = argparse.ArgumentParser(prog='sextant', usage='sextant', description="Invoke part of the SEXTANT program")
--    subparsers = argumentparser.add_subparsers(title="subcommands")
++    ap = argparse.ArgumentParser(prog='sextant',
++                                 usage='sextant',
++                                 description="Invoke part of the SEXTANT program")
++    subparsers = ap.add_subparsers(title="subcommands")
      #set what will be defined as a "common function"
      db_api.set_common_cutoff(config.common_cutoff)
@@ -257,10 +259,9 @@
          parsers[key].add_argument('--remote-neo4j', metavar="URL",
                                    help="URL of neo4j server", type=str,
                                    default=config.remote_neo4j)
--        parsers[key].add_argument('--use-ssh-tunnel', metavar="BOOL", type=str,
--                                  help="whether to SSH into the remote server,"
--                                       "True/False",
--                                  default=str(config.use_ssh_tunnel))
++        parsers[key].add_argument('--no-ssh-tunnel',
++                                  help='Disable ssh tunnelling. Prevents program upload.',
++                                  action='store_true')
          parsers[key].add_argument('--ssh-user', metavar="NAME", type=str,
                                    help="username to use as remote SSH name",
                                    default=str(config.ssh_user))
@@ -273,207 +274,28 @@
      # parse the arguments
--    return argumentparser.parse_args()
--
--
--def _start_tunnel(local_port, remote_host, remote_port, ssh_user=''):
--    """
--    Creates an SSH port-forward.
--
--    This will result in localhost:local_port appearing to be
--    remote_host:remote_port.
--
--    :param local_port: integer port number to open at localhost
--    :param remote_host: string address of remote host (no port number)
--    :param remote_port: port to 'open' on the remote host
--    :param ssh_user: user to log in on the remote_host as
--
--    """
--
--    if not (isinstance(local_port, int) and local_port > 0):
--        raise ValueError(
--            'Local port {} must be a positive integer.'.format(local_port))
--    if not (isinstance(remote_port, int) and remote_port > 0):
--        raise ValueError(
--            'Remote port {} must be a positive integer.'.format(remote_port))
--
--    logging.debug('Starting SSH tunnel...')
--
--    # this cmd string will be .format()ed in a few lines' time
--    cmd = ['ssh']
--
--    if ssh_user:
--        # ssh -l {user} ... sets the remote login username
--        cmd += ['-l', ssh_user]
--
--    # -L localport:localhost:remoteport forwards the port
--    # -M makes SSH able to accept slave connections
--    # -S sets the location of a control socket (in this case, sextant-controller
--    #    with a unique identifier appended, just in case we run sextant twice
--    #    simultaneously), so we know how to close the port again
--    # -f goes into background; -N does not execute a remote command;
--    # -T says to remote host that we don't want a text shell.
--    cmd += ['-M',
--            '-S', 'sextantcontroller{tunnel_id}'.format(tunnel_id=local_port),
--            '-fNT',
--            '-L', '{0}:localhost:{1}'.format(local_port, remote_port),
--            remote_host]
--
--    logging.debug('Running {}'.format(' '.join(cmd)))
--
--    exit_code = subprocess.call(cmd)
--    if exit_code:
--        raise OSError('SSH setup failed with error {}'.format(exit_code))
--
--    logging.debug('SSH tunnel created.')
--
--
--def _stop_tunnel(local_port, remote_host):
--    """
--    Tear down an SSH port-forward which was previously set up with start_tunnel.
--
--    We use local_port as an identifier.
--    :param local_port: the port on localhost we are using as the entrypoint
--    :param remote_host: remote host we tunnelled into
--
--    """
--
--    logging.debug('Shutting down SSH tunnel...')
--
--    # ssh -O sends a command to the slave specified in -S
--    cmd = ['ssh',
--           '-S', 'sextantcontroller{}'.format(local_port),
--           '-O', 'exit',
--           '-q',  # for quiet
--           remote_host]
--
--    # SSH has a bug on some systems which causes it to ignore the -q flag
--    # meaning it prints "Exit request sent." to stderr.
--    # To avoid this, we grab stderr temporarily, and see if it's that string;
--    # if it is, suppress it.
--    pr = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
--    stdout, stderr = pr.communicate()
--    if stderr.rstrip() != 'Exit request sent.':
--        print(stderr, file=sys.stderr)
--    if pr.returncode == 0:
--        logging.debug('Shut down successfully.')
--    else:
--        logging.warning(
--            'SSH tunnel shutdown returned error code {}'.format(pr.returncode))
--        logging.warning(stderr)
--
--
--def _is_port_used(port):
--    """
--    Checks with the OS to see whether a port is open.
--
--    Beware: port is passed directly to the shell. Make sure it is an integer.
--    We raise ValueError if it is not.
--    :param port: integer port to check for openness
--    :return: bool(port is in use)
--
--    """
--
--    # we follow http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
--    if not (isinstance(port, int) and port > 0):
--        raise ValueError('port {} must be a positive integer.'.format(port))
--
--    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
--    try:
--        sock.bind(('127.0.0.1', port))
--    except socket.error as e:
--        if e.errno == 98:  # Address already in use
--            return True
--        raise
--
--    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
--
--    return False  # that is, the port is not used
--
--
--def _get_unused_port():
--    """
--    Returns a port number between 10000 and 50000 which is not currently open.
--
--    """
--
--    keep_going = True
--    while keep_going:
--        portnum = random.randint(10000, 50000)
--        keep_going = _is_port_used(portnum)
--    return portnum
--
++    return ap.parse_args()
  def _get_host_and_port(url):
--    """Given a URL as http://host:port, returns (host, port)."""
--    parsed = parse.urlparse(url)
--    return (parsed.hostname, parsed.port)
--
--
--def _is_localhost(host, port):
--    """
--    Checks whether a host is an alias to localhost.
--
--    Raises socket.gaierror if the host was not found.
--
--    """
--
--    addr = socket.getaddrinfo(host, port)[0][4][0]
--
--    return addr in ('127.0.0.1', '::1')
++        """Given a URL as http://host:port, returns (host, port)."""
++        parsed = parse.urlparse(url)
++        return (parsed.hostname, parsed.port)
  def main():
      args = parse_arguments()
--
--    if args.use_ssh_tunnel.lower() == 'true':
--        localport = _get_unused_port()
--
--        remotehost, remoteport = _get_host_and_port(args.remote_neo4j)
--
--        try:
--            is_loc = _is_localhost(remotehost, remoteport)
--        except socket.gaierror:
--            logging.error('Server {} not found.'.format(remotehost))
--            return
--
--        if is_loc:
--            # we are attempting to connect to localhost anyway, so we won't
--            # bother to SSH to it.
--            # There may be some ways the user can trick us into trying to SSH
--            # to localhost anyway, but this will do as a first pass.
--            # SSHing to localhost is undesirable because on my test computer,
--            # we get 'connection refused' if we try.
--            args.func(args)
--
--        else:  # we need to SSH
--            try:
--                _start_tunnel(localport, remotehost, remoteport,
--                              ssh_user=args.ssh_user)
--            except OSError as e:
--                logging.error(str(e))
--                return
--            except KeyboardInterrupt:
--                logging.info('Halting because of user interrupt.')
--                return
--
--            try:
--                args.display_neo4j = args.remote_neo4j
--                args.remote_neo4j = 'http://localhost:{}'.format(localport)
--                args.func(args)
--            except KeyboardInterrupt:
--                # this probably happened because we were running Sextant Web
--                # and Ctrl-C'ed out of it
--                logging.info('Keyboard interrupt detected. Halting.')
--                pass
--
--            finally:
--                _stop_tunnel(localport, remotehost)
--
--    else:  # no need to set up the ssh, just run sextant
--        args.func(args)
--
--
++    remotehost, remoteport = _get_host_and_port(args.remote_neo4j)
++    no_ssh_tunnel = args.no_ssh_tunnel
++    connection = None
++
++    try:
++        conn_args = (remotehost, remoteport, no_ssh_tunnel)
++        with db_api.SextantConnection(*conn_args) as connection:
++            args.func(connection, args)
++    except sshmanager.SSHConnectionError as e:
++        print(e.message)
++
++
  if __name__ == '__main__':
      main()
 === added file 'src/sextant/csvwriter.py'
 --- src/sextant/csvwriter.py	1970-01-01 00:00:00 +0000
 +++ src/sextant/csvwriter.py	2014-10-23 12:33:12 +0000
@@ -0,0 +1,152 @@
++import logging
++
++"""
++Provide a class for writing to row-limited csv files.
++"""
++__all__ = ('CSVWriter',)
++
++
++class CSVWriter(object):
++    """
++    Write to csv files, automatically opening new ones at row maximum.
++
++    Provides a write(*args) method which will add a row to the currently open
++    csv file (internally managed) if there is room in it, otherwise close it,
++    silently open a new one and write to that.
++
++    Attributes:
++        base_path:
++            The base path of the output files - which will have a full path
++            of form "<base_path><number>.csv"
++        headers:
++            A list or tuple of strings which will be used as the column
++            headers. Attempts to write a row of data will induce a check
++            that the length of the data provided is exactly that of this
++            argument.
++        max_rows:
++            The maximum number of rows to write in each file (including the
++            header row) before opening a new file.
++
++        _fmt:
++            The format string which will be used to write a row to the csv
++            file. Of form '{},{},...,{}\n'.
++        _file:
++            The currently open file.
++        _file_count:
++            The number of files that the CSVWriter has written to. The next
++            file to be opened will have name '<base_path><_file_count>.csv'
++        _row_count:
++            The number of rows (including the header row) in the current file.
++        _total_row_count:
++            The number of rows (including the header rows) in ALL files.
++
++    """
++    # Filename fmt of output files - used with .format(base_path, number).
++    _file_fmt = '{}{}.csv'
++
++    def __init__(self, base_path, headers, max_rows):
++        """
++        Initialise the writer for writing.
++
++        Arguments:
++            base_path:
++                The base path of the output files - which will have a full path
++                of form "<base_path><number>.csv"
++            headers:
++                A list or tuple of strings which will be used as the column
++                headers. Attempts to write a row of data will induce a check
++                that the length of the data provided is exactly that of this
++                argument.
++            max_rows:
++                The maximum number of rows to write in each file (including the
++                header row) before opening a new file.
++        """
++        self.base_path = base_path
++        self.headers = headers
++        self.max_rows = max_rows
++
++        self._fmt = ','.join('{}' for h in headers) + '\n'
++
++        # The number of the file we are on and the line in it.
++        self._file = None
++        self._file_count = 0
++        self._row_count = 0
++
++        self._total_row_count = 0
++
++        self._open_new_file()
++
++    def _open_new_file(self):
++        """
++        Open a new file for editing, writing the headers in the first row.
++        """
++        self._close_file()
++
++        path = CSVWriter._file_fmt.format(self.base_path, self._file_count)
++        self._file = open(path, 'w+')
++        self._file_count += 1
++        self.write(*self.headers)
++
++    def _close_file(self):
++        """
++        Close the current file.
++
++        NOTE that this method should ALWAYS be called before attempting to read
++        from the file as it ensures that all changes have been written to disk,
++        not only buffered.
++        """
++        if self._file and not self._file.closed:
++            logging.debug('csvwriter wrote {} lines to {}'
++                          .format(self._row_count, self._file.name))
++            self._file.close()
++
++        self._row_count = 0
++
++    def write(self, *args):
++        """
++        Add a row the to current file, or to a new one if max_rows is reached.
++
++        The check against max_rows is made BEFORE writing the line.
++
++        Raises:
++            ValueError:
++                If the length of *args is not exactly the length of
++                self.headers - i.e. on attempt to write too many/too few items.
++
++        Arguments:
++            *args:
++                Strings, which will be written into the columns of the current
++                open csv file.
++        """
++        if not len(args) == len(self.headers):
++            msg = 'Attempted to write {} entries to file {} with {} columns'
++            raise ValueError(msg.format(len(args), self.base_path,
++                                        len(self.headers)))
++
++        if self._row_count == self.max_rows:
++            self._close_file()
++            self._open_new_file()
++
++        self._file.write(self._fmt.format(*args))
++        self._row_count += 1
++        self._total_row_count += 1
++
++    def file_iter(self):
++        """
++        Return an iterator over the names of the files the writer has
++        written to.
++        """
++        fmt = CSVWriter._file_fmt
++        return (fmt.format(self.base_path, i) for i in range(self._file_count))
++
++    def finish(self):
++        """
++        Flush and close the current file. If a subsequent call to self.write
++        is made, a new file will be created to contain it.
++
++        Return the number of files we have written to and the total number
++        of lines we have written.
++        """
++        self._close_file()
++        return self._file_count, self._total_row_count
++
 === modified file 'src/sextant/db_api.py'
 --- src/sextant/db_api.py	2014-09-03 14:10:07 +0000
 +++ src/sextant/db_api.py	2014-10-23 12:33:12 +0000
@@ -5,208 +5,348 @@
  # -----------------------------------------
  # API to interact with a Neo4J server: upload, query and delete programs in a DB
--__all__ = ("Validator", "AddToDatabase", "FunctionQueryResult", "Function",
++from __future__ import print_function
++
++__all__ = ("validate_query", "DBProgram", "FunctionQueryResult", "Function",
             "SextantConnection")
++from sys import stdout
++
  import re  # for validation of function/program names
  import logging
  from datetime import datetime
  import os
  import getpass
  from collections import namedtuple
--
--from neo4jrestclient.client import GraphDatabase
--import neo4jrestclient.client as client
--
++import random
++import socket
++
++import itertools
++import subprocess
++from time import time
++
++import neo4jrestclient.client as neo4jrestclient
++
++from sshmanager import SSHManager, SSHConnectionError
++from csvwriter import CSVWriter
++
++# The directory on the local machine to which csv files will be written
++# prior to copy over to the remote server.
++TMP_DIR = '/tmp/sextant'
++
++# A function is deemed 'common' if it has more than this
++# many connections.
  COMMON_CUTOFF = 10
--# a function is deemed 'common' if it has more than this
--# many connections
--
--
--class Validator():
--    """ Sanitises/checks strings, to prevent Cypher injection attacks"""
--
--    @staticmethod
--    def validate(input_):
--        """
--        Checks whether we can allow a string to be passed into a Cypher query.
--        :param input_: the string we wish to validate
--        :return: bool(the string is allowed)
--        """
--        regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')
--        return bool(regex.match(input_))
--
--    @staticmethod
--    def sanitise(input_):
--        """
--        Strips harmful characters from the given string.
--        :param input_: string to sanitise
--        :return: the sanitised string
--        """
--        return re.sub(r'[^\.\-_a-zA-Z0-9]+', '', input_)
--
--
--class AddToDatabase():
--    """Updates the database, adding functions/calls to a given program"""
--
--    def __init__(self, program_name='', sextant_connection=None,
--                 uploader='', uploader_id='', date=None):
--        """
--        Object which can be used to add functions and calls to a new program
--        :param program_name: the name of the new program to be created
--          (must already be validated against Validator)
--        :param sextant_connection: the SextantConnection to use for connections
--        :param uploader: string identifier of user who is uploading
--        :param uploader_id: string Unix user-id of logged-in user
--        :param date: string date of today
--        """
--        # program_name must be alphanumeric, to avoid injection attacks easily
--        if not Validator.validate(program_name):
--            return
++
++
++
++def set_common_cutoff(common_def):
++    """
++    Sets the number of incoming connections at which we deem a function 'common'
++    Default is 10 (which is used if this method is never called).
++    :param common_def: number of incoming connections
++    """
++    global COMMON_CUTOFF
++    COMMON_CUTOFF = common_def
++
++
++def validate_query(string):
++    """
++    Checks whether we can allow a string to be passed into a Cypher query.
++    :param string: the string we wish to validate
++    :return: bool(the string is allowed)
++    """
++    regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')
++    return bool(regex.match(string))
++
++
++class DBProgram(object):
++    """
++    Representation of a program in the database.
++
++    Provides add_function and add_call methods which locally register functions
++    and calls. The commit method uploads everything to the database.
++
++    Attributes:
++        uploader, uploader_id, program_name, date:
++            As in __init__.
++
++        _conn:
++            The SextantConnection object managing the database connection.
++        _ssh:
++            The SSHManager object belonging to the SextantConnection.
++        _db:
++            The database object belonging to the SextantConnection.
++
++        _tmp_dir:
++            The user-specific location of the local temporary directory.
++
++        func_writer:
++            A CSVWriter object which manages the csv files containing the
++            list of functions in the program.
++        call_writer:
++            A CSVWriter object which manages the csv files containing the
++            list of function calls in the program.
++
++        add_func_query:
++            A string for the cypher query used to create functions from a csv
++            file.
++        add_call_query:
++            A string for the cypher query used to create funciton calls from
++            a csv file.
++        add_program_query:
++            A string for the cypher query used to create the program node.
++    """
++
++    def __init__(self, connection, program_name, uploader, uploader_id, date):
++        """
++        Initialise the database program.
++
++        A local temporary folder is created at 'TMP_DIR-<user_name>'.
++        When functions or calls are added via the add_function/call methods,
++        they are registered in csv files which are stored in this directory.
++
++        Committing the program copies these files to the neo4j server and
++        cleans the local tmp folder.
++
++        Raises:
++            ValueError:
++                If the program_name is not alphanumeric.
++            CommandError:
++                If the command to create the temporary directory failed.
++
++        Arguments:
++            connection:
++                The SextantConnection object which manages the connection to
++                the database.
++            program_name:
++                The name to register the program under in the database. Must be
++                alphanumeric.
++            uploader:
++                The name of the user who uploaded the program.
++            uploader_id:
++                A numeric id of the user who uploaded the program.
++            date:
++                A string representing the upload date.
++        """
++        # Ensure an alphanumeric program name.
++        if not validate_query(program_name):
++            raise ValueError('program name must be alphanumeric, got: {}'
++                             .format(program_name));
++
++        self.uploader = uploader
++        self.uploader_id = uploader_id
          self.program_name = program_name
--        self.parent_database_connection = sextant_connection
--        self._functions = {}
--        self._funcs_tx = None  # transaction for uploading functions
--        self._calls_tx = None  # transaction for uploading relationships
--
--        if self.parent_database_connection:
--            # we'll locally use db for short
--            db = self.parent_database_connection._db
--
--            parent_function = db.nodes.create(name=program_name,
--                                              type='program',
--                                              uploader=uploader,
--                                              uploader_id=uploader_id,
--                                              date=date)
--            self._parent_id = parent_function.id
--
--            self._funcs_tx = db.transaction(using_globals=False, for_query=True)
--            self._calls_tx = db.transaction(using_globals=False, for_query=True)
--
--        self._connections = []
--
--    @staticmethod
--    def _get_display_name(function_name):
--        """
--        Gets the name we will display to the user for this function name.
--
--        For instance, if function_name were __libc_start_main@plt, we would
--        return ("__libc_start_main", "plt_stub"). The returned function type is
--        currently one of "plt_stub", "function_pointer" or "normal".
--
--        :param function_name: the name straight from objdump of a function
--        :return: ("display name", "function type")
--
--        """
--
--        if function_name[-4:] == "@plt":
--            display_name = function_name[:-4]
--            function_group = "plt_stub"
--        elif function_name[:20] == "_._function_pointer_":
--            display_name = function_name
--            function_group = "function_pointer"
--        else:
--            display_name = function_name
--            function_group = "normal"
--
--        return display_name, function_group
--
--    def add_function(self, function_name):
--        """
--        Adds a function to the program, ready to be sent to the remote database.
--        If the function name is already in use, this method effectively does
--          nothing and returns True.
--
--        :param function_name: a string which must be alphanumeric
--        :return: True if the request succeeded, False otherwise
--        """
--        if not Validator.validate(function_name):
--            return False
--        if self.class_contains_function(function_name):
--            return True
--
--        display_name, function_group = self._get_display_name(function_name)
--
--        query = ('START n = node({}) '
--                 'CREATE (n)-[:subject]->(m:func {{type: "{}", name: "{}"}}) '
--                 'RETURN m.name, id(m)')
--        query = query.format(self._parent_id, function_group, display_name)
--
--        self._funcs_tx.append(query)
--
--        self._functions[function_name] = function_name
--
--        return True
--
--    def class_contains_function(self, function_to_find):
--        """
--        Checks whether we contain a function with a given name.
--        :param function_to_find: string name of the function we wish to look up
--        :return: bool(the function exists in this AddToDatabase)
--        """
--        return function_to_find in self._functions
--
--    def class_contains_call(self, function_calling, function_called):
--        """
--        Checks whether we contain a call between the two named functions.
--        :param function_calling: string name of the calling-function
--        :param function_called: string name of the called function
--        :return: bool(function_calling calls function_called in us)
--        """
--        return (function_calling, function_called) in self._connections
--
--    def add_function_call(self, fn_calling, fn_called):
--        """
--        Adds a function call to the program, ready to be sent to the database.
--        Effectively does nothing if there is already a function call between
--          these two functions.
--        Function names must be alphanumeric for easy security purposes;
--          returns False if they fail validation.
--        :param fn_calling: the name of the calling-function as a string.
--          It should already exist in the AddToDatabase; if it does not,
--          this method will create a stub for it.
--        :param fn_called: name of the function called by fn_calling.
--          If it does not exist, we create a stub representation for it.
--        :return: True if successful, False otherwise
--        """
--        if not all((Validator.validate(fn_calling),
--                    Validator.validate(fn_called))):
--            return False
--
--        if not self.class_contains_function(fn_called):
--            self.add_function(fn_called)
--        if not self.class_contains_function(fn_calling):
--            self.add_function(fn_calling)
--
--        if not self.class_contains_call(fn_calling, fn_called):
--            self._connections.append((fn_calling, fn_called))
--
--        return True
++        self.date = date
++
++        self._conn = connection
++        self._ssh = connection._ssh
++        self._db = connection._db
++
++        self._tmp_dir = '{}-{}'.format(TMP_DIR, getpass.getuser())
++
++        # Make the local tmp file - csv files will be written into here.
++        try:
++            os.makedirs(self._tmp_dir)
++        except OSError as e:
++            if e.errno == os.errno.EEXIST: # File already exists.
++                pass
++            else:
++                raise e
++
++
++        tmp_path = os.path.join(self._tmp_dir, '{}_{{}}'.format(program_name))
++
++        self.func_writer = CSVWriter(tmp_path.format('funcs'),
++                                     headers=['name', 'type'],
++                                     max_rows=5000)
++        self.call_writer = CSVWriter(tmp_path.format('calls'),
++                                     headers=['caller', 'callee'],
++                                     max_rows=5000)
++
++        # Define the queries we use to upload the functions and calls.
++        self.add_func_query = (' USING PERIODIC COMMIT 250'
++                 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
++                 ' WITH line, toInt(line.id) as lineid'
++                 ' MATCH (n:program {{name: "{}"}})'
++                 ' CREATE (n)-[:subject]->(m:func {{name: line.name,'
++                 ' id: lineid, type: line.type}})')
++
++        self.add_call_query = (' USING PERIODIC COMMIT 250'
++                 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
++                 ' MATCH (p:program {{name: "{}"}})'
++                 ' MATCH (p)-[:subject]->(n:func {{name: line.caller}})'
++                 ' USING INDEX n:func(name)'
++                 ' MATCH (p)-[:subject]->(m:func {{name: line.callee}})'
++                 ' USING INDEX m:func(name)'
++                 ' CREATE (n)-[r:calls]->(m)')
++
++        self.add_program_query = ('CREATE (p:program {{name: "{}", uploader: "{}", '
++                ' uploader_id: "{}", date: "{}",'
++                ' function_count: {}, call_count: {}}})')
++
++
++    def __enter__(self):
++        """
++        Allow DBProgram to be used as  a context manager.
++        """
++        return self
++
++    def __exit__(self, etype, evalue, etrace):
++        """
++        Make sure that all files are properly closed.
++        """
++        self.func_writer.finish()
++        self.call_writer.finish()
++
++        # Propagate the error if there is one.
++        return False if etype is not None else True
++
++    def add_function(self, name, typ='normal'):
++        """
++        Add a function.
++
++        Arguments:
++            name:
++                The name of the function.
++            typ:
++                The type of the function, may be any string, but standard types
++                are:
++                    normal: we have the disassembly for this function
++                    stub:   we have the name but not the disassembly - usually
++                            an imported library function.
++                    pointer: we know only that the function exists, not its
++                            name or details.
++        """
++        self.func_writer.write(name, typ)
++
++    def add_call(self, caller, callee):
++        """
++        Add a function call.
++
++        Arguments:
++            caller:
++                The name of the function making the call.
++            callee:
++                The name of the function called.
++        """
++        self.call_writer.write(caller, callee)
++
++
++    def _copy_local_to_remote_tmp_dir(self):
++        """
++        Move local tmp files to the server ready for upload.
++
++        Return a tuple of iterators, the first over the paths on the remote
++        machine of the function files, and the second over the paths of the
++        call files.
++        """
++        print('Sending files to remote server...', end='')
++        stdout.flush()
++        remote_funcs = self._ssh.send_to_tmp_dir(self.func_writer.file_iter())
++        remote_calls = self._ssh.send_to_tmp_dir(self.call_writer.file_iter())
++        print('finished.')
++        return remote_funcs, remote_calls
++
++    def _clean_tmp_files(self, remote_paths):
++        """
++        Delete temporary files on the local and remote machine.
++
++        Arguments:
++            remote_paths:
++                A list of the paths of the remote fils.
++        """
++        print('Cleaning temporary files...', end='')
++        file_paths = list(itertools.chain(self.func_writer.file_iter(),
++                                          self.call_writer.file_iter()))
++
++        for path in file_paths:
++            os.remove(path)
++
++        os.rmdir(self._tmp_dir)
++
++        try:
++            # If the parent sextant temp folder is empty, remove it.
++            os.rmdir(TMP_DIR)
++        except:
++            # There is other stuff in TMP_DIR (i.e. from other users), so
++            # leave it.
++            pass
++
++        self._ssh.remove_from_tmp_dir(remote_paths)
++
++        print('done.')
++
++    def _create_db_constraints(self):
++        """
++        Create indexes in the database on program and function names.
++
++        The program name index is a constraint, which will also garuantee the
++        uniqueness of program names.
++        """
++        # Prepare a transaction object which we use to execute cypher queries.
++        tx = self._db.transaction(using_globals=False, for_query=True)
++
++        tx.append('CREATE CONSTRAINT ON (p:program) ASSERT p.name IS UNIQUE')
++        tx.append('CREATE INDEX ON :func(name)')
++
++        # Apply the transaction.
++        tx.commit()
      def commit(self):
          """
--        Call this when you are finished with the object.
--        Changes are not synced to the remote database until this is called.
++        Insert the program into the database.
++
++        Move the local temp files created by our func_writer and call_writer
++        to the database server's temp directory. From there use cypher queries
++        to upload them into the database, before cleaning them up.
          """
--        functions = self._funcs_tx.commit()  # send off the function names
--
--        # now functions is a list of QuerySequence objects, which each have a
--        # .elements property which produces [['name', id]]
--
--        id_funcs = dict([seq.elements[0] for seq in functions])
--        logging.info('Functions uploaded. Uploading calls...')
--
--        # so id_funcs is a dict with id_funcs['name'] == id
--        for call in self._connections:
--            query = ('MATCH n WHERE id(n) = {} '
--                     'MATCH m WHERE id(m) = {} '
--                     'CREATE (n)-[:calls]->(m)')
--            query = query.format(id_funcs[self._get_display_name(call[0])[0]],
--                                 id_funcs[self._get_display_name(call[1])[0]])
--            self._calls_tx.append(query)
--
--        self._calls_tx.commit()
++        # Ensure that the most recent files are flushed and closed.
++        func_file_count, func_line_count = self.func_writer.finish()
++        call_file_count, call_line_count = self.call_writer.finish()
++
++        # Account for the header line at the top of each file.
++        func_count = func_line_count - func_file_count
++        call_count = call_line_count - call_file_count
++
++        # Get the remote path names as iterators, then make lists of them
++        # so that we can iterate over them more than once.
++        remote_f_iter, remote_c_iter = self._copy_local_to_remote_tmp_dir()
++        remote_funcs, remote_calls = map(list, (remote_f_iter, remote_c_iter))
++
++        # Create the indexes and constraints in the database.
++        self._create_db_constraints()
++
++
++        try:
++            tx = self._db.transaction(using_globals=False, for_query=True)
++
++            # Create the program node in the database.
++            tx.append(self.add_program_query.format(self.program_name, self.uploader,
++                                                    self.uploader_id, self.date,
++                                                    func_count, call_count))
++            tx.commit()
++
++            # Create the functions.
++            for files, query, descr in zip((remote_funcs, remote_calls),
++                                           (self.add_func_query, self.add_call_query),
++                                           ('funcs', 'calls')):
++                start = time()
++                for i, path in enumerate(files):
++                    completed = int(100*float(i+1)/len(files))
++
++                    print('\rUploading {}: {}%'.format(descr, completed), end='')
++                    stdout.flush()
++
++                    tx.append(query.format(path, self.program_name))
++                    tx.commit()
++                end = time()
++                print(' done.')
++
++        finally:
++            # Cleanup temporary folders
++            self._clean_tmp_files(remote_funcs + remote_calls)
  class FunctionQueryResult:
@@ -219,7 +359,7 @@
          self._update_common_functions()
      def __eq__(self, other):
--        # we make a dictionary so that we can perform easy comparison
++        # We make a dictionary so that we can perform easy comparison.
          selfdict = {func.name: func for func in self.functions}
          otherdict = {func.name: func for func in other.functions}
@@ -243,20 +383,20 @@
          if rest_output is None or not rest_output.elements:
              return []
--        # how we store this is: a dict
++        # How we store this is: a dict
          #   with keys  'functionname'
          #   and values [the function object we will use,
          #               and a set of (function names this function calls),
--        #               and numeric ID of this node in the Neo4J database]
++        #               and numeric ID of this node in the Neo4J database].
          result = {}
--        # initial pass for names of functions
++        # Initial pass for names of functions.
--        # if the following assertion failed, we've probably called db.query
++        # If the following assertion failed, we've probably called db.query
          # to get it to not return client.Node objects, which is wrong.
          # we attempt to handle this a bit later; this should never arise, but
--        # we can cope with it happening in some cases, like the test suite
++        # we can cope with it happening in some cases, like the test suite.
          if type(rest_output.elements) is not list:
              logging.warning('Not a list: {}'.format(type(rest_output.elements)))
@@ -264,11 +404,12 @@
          for node_list in rest_output.elements:
              assert(isinstance(node_list, list))
              for node in node_list:
--                if isinstance(node, client.Node):
++                if isinstance(node, neo4jrestclient.Node):
                      name = node.properties['name']
                      node_id = node.id
                      node_type = node.properties['type']
--                else:  # this is the handling we mentioned earlier;
++                else:
++                    # This is the handling we mentioned earlier;
                      # we are a dictionary instead of a list, as for some
                      # reason we've returned Raw rather than Node data.
                      # We should never reach this code, but just in case.
@@ -283,7 +424,7 @@
                                  set(),
                                  node_id]
--        # end initialisation of names-dictionary
++        # End initialisation of names-dictionary.
          if self._parent_db_connection is not None:
              # This is the normal case, of extracting results from a server.
@@ -301,7 +442,7 @@
              logging.debug('exec')
              results = new_tx.execute()
--            # results is a list of query results, each of those being a list of
++            # Results is a list of query results, each of those being a list of
              # calls.
              for call_list in results:
@@ -315,7 +456,7 @@
                      # recall: set union is denoted by |
          else:
--            # we don't have a parent database connection.
++            # We don't have a parent database connection.
              # This has probably arisen because we created this object from a
              # test suite, or something like that.
              for node in rest_output.elements:
@@ -353,19 +494,10 @@
          func_list = [func for func in self.functions if func.name == name]
          return None if len(func_list) == 0 else func_list[0]
--
--def set_common_cutoff(common_def):
--    """
--    Sets the number of incoming connections at which we deem a function 'common'
--    Default is 10 (which is used if this method is never called).
--    :param common_def: number of incoming connections
--    """
--    global COMMON_CUTOFF
--    COMMON_CUTOFF = common_def
--
--
  class Function(object):
--    """Represents a function which might appear in a FunctionQueryResult."""
++    """
++    Represents a function which might appear in a FunctionQueryResult.
++    """
      def __eq__(self, other):
          funcs_i_call_list = {func.name for func in self.functions_i_call}
@@ -393,11 +525,11 @@
          self.name = function_name
          self.is_common = False
          self._number_calling_me = 0
--        # care: _number_calling_me is not automatically updated, except by
++        # Care: _number_calling_me is not automatically updated, except by
          # any invocation of FunctionQueryResult._update_common_functions.
--class SextantConnection:
++class SextantConnection(object):
      """
      RESTful connection to a remote database.
      It can be used to create/delete/query programs.
@@ -406,56 +538,214 @@
      ProgramWithMetadata = namedtuple('ProgramWithMetadata',
                                       ['uploader', 'uploader_id',
                                        'program_name', 'date',
--                                      'number_of_funcs'])
--
--    def __init__(self, url):
--        self.url = url
--        self._db = GraphDatabase(url)
--
--    def new_program(self, name_of_program):
++                                      'number_of_funcs', 'number_of_calls'])
++
++    @staticmethod
++    def _is_localhost(host, port):
++        """
++        Checks whether a host is an alias to localhost.
++
++        Raises socket.gaierror if the host was not found.
++        """
++        addr = socket.getaddrinfo(host, port)[0][4][0]
++        return addr in ('127.0.0.1', '::1')
++
++    @staticmethod
++    def _is_port_used(port):
++        """
++        Checks with the OS to see whether a port is open.
++
++        Beware: port is passed directly to the shell. Make sure it is an integer.
++        We raise ValueError if it is not.
++        :param port: integer port to check for openness
++        :return: bool(port is in use)
++        """
++        result = False
++
++        # We follow:
++        # http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
++        if not (isinstance(port, int) and port > 0):
++            raise ValueError('port {} must be a positive integer.'.format(port))
++
++        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
++        try:
++            sock.bind(('127.0.0.1', port))
++        except socket.error as e:
++            if e.errno == os.errno.EADDRINUSE:
++                result = True
++            else:
++                raise
++
++        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
++
++        return result  # that is, the port is not used
++
++    @staticmethod
++    def _get_unused_port():
++        """
++        Returns a port number between 10000 and 50000 which is not currently open.
++        """
++
++        keep_going = True
++        while keep_going:
++            portnum = random.randint(10000, 50000)
++            keep_going = SextantConnection._is_port_used(portnum)
++        return portnum
++
++
++    def __enter__(self):
++        return self
++
++    def __exit__(self, etype, evalue, etrace):
++        self.close()
++        return False if etype is not None else True
++
++
++    def __init__(self, remotehost, remoteport, no_ssh_tunnel=False):
++        """
++        Initialise the database and ssh connections.
++
++        Arguments:
++            remotehost:
++                The remote host name to connect to.
++            remoteport:
++                The port number to connect to on the remote host.
++            no_ssh_tunnel:
++                Disables the SSHManager if True. Prevents program upload.
++        """
++
++        self.remote_host = remotehost
++        self.remote_port = remoteport
++
++
++        self._no_ssh_tunnel = no_ssh_tunnel
++        self._ssh = None
++        self._db = None
++
++        self.open()
++
++    def open(self):
++        local_port = SextantConnection._get_unused_port()
++        is_localhost = SextantConnection._is_localhost(self.remote_host, self.remote_port)
++
++        if self._no_ssh_tunnel and not is_localhost:
++            raise SSHConnectionError('Cannot connect to the remote database '
++                                     'without an ssh connection.')
++        else:
++            # Either we are making an ssh tunnel or we are contacting localhost.
++            self._ssh = SSHManager(local_port,
++                                   self.remote_host,
++                                   self.remote_port,
++                                   is_localhost=is_localhost)
++
++            port = self.remote_port if is_localhost else local_port
++            url = 'http://localhost:{}'.format(port)
++
++        self._db = neo4jrestclient.GraphDatabase(url)
++
++    def close(self):
++        """
++        Close the ssh connection to clean up its resources.
++        """
++        if self._ssh:
++            self._ssh.close()
++
++    def new_program(self, program_name):
          """
          Request that the remote database create a new program with the given name.
          This procedure will create a new program remotely; you can manipulate
--          that program using the returned AddToDatabase object.
++          that program using the returned DBProgram object.
          The name can appear in the database already, but this is not recommended
            because then delete_program will not know which to delete. Check first
            using self.check_program_exists.
--        The name specified must pass Validator.validate()ion; this is a measure
++        The name specified must pass validate_query()ion; this is a measure
            to prevent Cypher injection attacks.
--        :param name_of_program: string program name
--        :return: AddToDatabase instance if successful
++        :param program_name: string program name
++        :return: DBProgram instance if successful
          """
--        if not Validator.validate(name_of_program):
--            raise ValueError(
--                "{} is not a valid program name".format(name_of_program))
++        if not validate_query(program_name):
++            raise ValueError("{} is not a valid program name"
++                             .format(program_name))
          uploader = getpass.getuser()
          uploader_id = os.getuid()
--
--        return AddToDatabase(sextant_connection=self,
--                             program_name=name_of_program,
--                             uploader=uploader, uploader_id=uploader_id,
--                             date=str(datetime.now()))
--
--    def delete_program(self, name_of_program):
++        timestr = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
++
++        return DBProgram(self, program_name, uploader,
++                         uploader_id, date=timestr)
++
++    def delete_program(self, program_name):
          """
          Request that the remote database delete a specified program.
--        :param name_of_program: a string which must be alphanumeric only
++        :param program_name: a string which must be alphanumeric only
          :return: bool(request succeeded)
          """
--        if not Validator.validate(name_of_program):
--            return False
--
--        q = """MATCH (n) WHERE n.name= "{}" AND n.type="program"
--        OPTIONAL MATCH (n)-[r]-(b) OPTIONAL MATCH (b)-[rel]-()
--        DELETE  b,rel DELETE n, r""".format(name_of_program)
--
--        self._db.query(q)
++        if not program_name in self.get_program_names():
++            print('No program `{}` in the database'.format(program_name))
++            return True
++        else:
++            print('Deleting `{}` from the database. '
++                  'This may take some time for larger programs.'
++                  .format(program_name))
++
++        start = time()
++        tx = self._db.transaction(using_globals=False, for_query=True)
++
++        count_query = (' MATCH (p:program {{name: "{}"}})'
++                       ' RETURN p.function_count, p.call_count'
++                       .format(program_name))
++
++        tx.append(count_query)
++        func_count, call_count = tx.commit()[0].elements[0]
++
++        del_call_query = ('OPTIONAL MATCH (p:program {{name: "{}"}})'
++                          '-[:subject]->(f:func)-[c:calls]->()'
++                          ' WITH c LIMIT 5000 DELETE c RETURN count(distinct(c))'
++                          .format(program_name))
++
++        del_func_query = ('OPTIONAL MATCH (p:program {{name: "{}"}})'
++                          '-[s:subject]->(f:func)'
++                          ' WITH s, f LIMIT 5000 DELETE s, f RETURN count(f)'
++                          .format(program_name))
++
++        del_prog_query = ('MATCH (p:program {{name: "{}"}}) DELETE p'
++                          .format(program_name))
++
++        # Delete calls first, a node may not be deleted until all relationships
++        # referencing it are deleted.
++        for count, query, descr in zip((call_count, func_count),
++                                       (del_call_query, del_func_query),
++                                       ('calls', 'funcs')):
++            # Change tracks whether the last delete did anything. We would
++            # like to use: while done < count: ..., but if the program has
++            # already been partially deleted then this will never terminate.
++            # Furthermore, if there are no functions or no calls, the while
++            # loop will be appropriately skipped.
++            change = count
++            done = 0
++            while change:
++                completed = int(100 * float(done)/count)
++                print('\rDeleting {}: {}%'.format(descr, completed), end='')
++                stdout.flush()
++
++                tx.append(query)
++                change = tx.commit()[0].elements[0][0]
++                done += change
++            if done:
++                print(' done.')
++
++        # Delete the program node.
++        tx.append(del_prog_query)
++        tx.commit()
++
++        end = time()
++        print('Finished in {:.2f}s.'.format(end - start))
          return True
--    def _execute_query(self, prog_name='', query=''):
++
++    def _execute_query(self, prog_name, query):
          """
          Executes a Cypher query against the remote database.
          Note that this returns a FunctionQueryResult, so is unsuitable for any
@@ -468,7 +758,7 @@
          :param query: verbatim query we wish the server to execute
          :return: a FunctionQueryResult corresponding to the server's output
          """
--        rest_output = self._db.query(query, returns=client.Node)
++        rest_output = self._db.query(query, returns=neo4jrestclient.Node)
          return FunctionQueryResult(parent_db=self._db,
                                     program_name=prog_name,
@@ -481,12 +771,11 @@
            method which requires a program-name input.
          :return: a list of function-name strings.
          """
--        q = """MATCH (n) WHERE n.type = "program" RETURN n.name"""
++        q = 'MATCH (n:program) RETURN n.name'
          program_names = self._db.query(q, returns=str).elements
--        result = [el[0] for el in program_names]
++        return set(el[0] for el in program_names)
--        return set(result)
      def programs_with_metadata(self):
          """
@@ -498,27 +787,28 @@
          """
--        q = ("MATCH (base) WHERE base.type = 'program' "
--             "MATCH (base)-[:subject]->(n)"
--             "RETURN base.uploader, base.uploader_id, base.name, base.date, count(n)")
++        q = (' MATCH (p:program)'
++             ' RETURN p.uploader, p.uploader_id, p.name, p.date,'
++             ' p.function_count, p.call_count')
          result = self._db.query(q)
          return {self.ProgramWithMetadata(*res) for res in result}
      def check_program_exists(self, program_name):
          """
          Execute query to check whether a program with the given name exists.
--        Returns False if the program_name fails validation against Validator.
++        Returns False if the program_name fails validation (i.e. is possibly
++        unsafe as a string in a cypher query).
          :return: bool(the program exists in the database).
          """
--        if not Validator.validate(program_name):
++        if not validate_query(program_name):
              return False
--        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "
--             "RETURN count(base)").format(program_name)
++        q = ('MATCH (p:program {{name: "{}"}}) RETURN p LIMIT 1'
++             .format(program_name))
--        result = self._db.query(q, returns=int)
--        return result.elements[0][0] > 0
++        result = self._db.query(q, returns=neo4jrestclient.Node)
++        return bool(result)
      def check_function_exists(self, program_name, function_name):
          """
@@ -529,18 +819,18 @@
          :param function_name: string name of the function to check for existence
          :return: bool(names validate correctly, and function exists in program)
          """
--        if not self.check_program_exists(program_name):
--            return False
--
--        if not Validator.validate(program_name):
--            return False
--
--        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program'"
--             "MATCH (base)-[r:subject]->(m) WHERE m.name = '{}'"
--             "RETURN count(m)").format(program_name, function_name)
--
--        result = self._db.query(q, returns=int)
--        return result.elements[0][0] > 0
++        if not validate_query(program_name):
++            return False
++
++        pmatch = '(:program {{name: "{}"}})'.format(program_name)
++        fmatch = '(f:func {{name: "{}"}})'.format(function_name)
++        # be explicit about index usage
++        q = (' MATCH {}-[:subject]->{} USING INDEX f:func(name)'
++             ' RETURN f LIMIT 1'.format(pmatch, fmatch))
++
++        # result will be an empty list if the function was not found
++        result = self._db.query(q, returns=neo4jrestclient.Node)
++        return bool(result)
      def get_function_names(self, program_name):
          """
@@ -552,12 +842,11 @@
            a set of function-name strings otherwise.
          """
--        if not self.check_program_exists(program_name):
--            return None
++        if not validate_query(program_name):
++            return set()
--        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "
--             "MATCH (base)-[r:subject]->(m) "
--             "RETURN  m.name").format(program_name)
++        q = (' MATCH (:program {{name: "{}"}})-[:subject]->(f:func)'
++             ' RETURN f.name').format(program_name)
          return {func[0] for func in self._db.query(q)}
      def get_all_functions_called(self, program_name, function_calling):
@@ -570,16 +859,13 @@
          :return: FunctionQueryResult, maximal subgraph rooted at function_calling
          """
--        if not self.check_program_exists(program_name):
--            return None
--
          if not self.check_function_exists(program_name, function_calling):
              return None
--        q = """MATCH (base) WHERE base.name = '{}' ANd base.type = 'program'
--            MATCH (base)-[:subject]->(m) WHERE m.name='{}'
--            MATCH (m)-[:calls*]->(n)
--            RETURN distinct n, m""".format(program_name, function_calling)
++        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})'
++             ' USING INDEX f:func(name)'
++             ' MATCH (f)-[:calls*]->(g) RETURN distinct f, g'
++             .format(program_name, function_calling))
          return self._execute_query(program_name, q)
@@ -593,16 +879,13 @@
          :return: FunctionQueryResult, maximal connected subgraph with leaf function_called
          """
--        if not self.check_program_exists(program_name):
--            return None
--
          if not self.check_function_exists(program_name, function_called):
              return None
--        q = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'
--            MATCH (base)-[r:subject]->(m) WHERE m.name='{}'
--            MATCH (n)-[:calls*]->(m) WHERE n.name <> '{}'
--            RETURN distinct n , m"""
++        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(g:func {{name: "{}"}})'
++             ' USING INDEX g:func(name)'
++             ' MATCH (f)-[:calls*]->(g) WHERE f.name <> "{}"'
++             ' RETURN distinct f , g')
          q = q.format(program_name, function_called, program_name)
          return self._execute_query(program_name, q)
@@ -628,12 +911,14 @@
          if not self.check_function_exists(program_name, function_calling):
              return None
--        q = r"""MATCH (pr) WHERE pr.name = '{}' AND pr.type = 'program'
--                MATCH p=(start {{name: "{}" }})-[:calls*]->(end {{name:"{}"}})
--                  WHERE (pr)-[:subject]->(start)
--                WITH DISTINCT nodes(p) AS result
--                UNWIND result AS answer
--                RETURN answer"""
++        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(start:func {{name: "{}"}})'
++             ' USING INDEX start:func(name)'
++             ' MATCH (p)-[:subject]->(end:func {{name: "{}"}})'
++             ' USING INDEX end:func(name)'
++             ' MATCH path=(start)-[:calls*]->(end)'
++             ' WITH DISTINCT nodes(path) AS result'
++             ' UNWIND result AS answer'
++             ' RETURN answer')
          q = q.format(program_name, function_calling, function_called)
          return self._execute_query(program_name, q)
@@ -648,11 +933,9 @@
          if not self.check_program_exists(program_name):
              return None
--        query = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'
--                MATCH (base)-[subject:subject]->(m)
--                RETURN DISTINCT (m)""".format(program_name)
--
--        return self._execute_query(program_name, query)
++        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func)'
++             ' RETURN (f)'.format(program_name))
++        return self._execute_query(program_name, q)
      def get_shortest_path_between_functions(self, program_name, func1, func2):
          """
@@ -671,9 +954,11 @@
          if not self.check_function_exists(program_name, func2):
              return None
--        q = """MATCH (func1 {{ name:"{}" }}),(func2 {{ name:"{}" }}),
--            p = shortestPath((func1)-[:calls*]->(func2))
--            UNWIND nodes(p) AS ans
--            RETURN ans""".format(func1, func2)
++        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})'
++             ' USING INDEX f:func(name)'
++             ' MATCH (p)-[:subject]->(g:func {{name: "{}"}})'
++             ' MATCH path=shortestPath((f)-[:calls*]->(g))'
++             ' UNWIND nodes(path) AS ans'
++             ' RETURN ans'.format(program_name, func1, func2))
          return self._execute_query(program_name, q)
 === modified file 'src/sextant/export.py'
 --- src/sextant/export.py	2014-09-04 09:46:18 +0000
 +++ src/sextant/export.py	2014-10-23 12:33:12 +0000
@@ -46,7 +46,7 @@
          font_name = "Helvetica"
          for func in program.get_functions():
--            if func.type == "plt_stub":
++            if func.type == "stub":
                  output_str += ' "{}" [fillcolor=pink, style=filled]\n'.format(func.name)
              elif func.type == "function_pointer":
                  output_str += ' "{}" [fillcolor=yellow, style=filled]\n'.format(func.name)
@@ -108,7 +108,7 @@
          for func in program.get_functions():
              display_func = ProgramConverter.get_display_name(func)
--            if func.type == "plt_stub":
++            if func.type == "stub":
                  colour = "#ff00ff"
              elif func.type == "function_pointer":
                  colour = "#99ffff"
@@ -175,4 +175,4 @@
                  output_str += '<edge source="{}" target="{}"> <data key="calls">1</data> </edge>\n'.format(func.name, callee.name)
          output_str += '</graph>\n</graphml>'
--        return output_str
 \ No newline at end of file
++        return output_str
 === modified file 'src/sextant/objdump_parser.py' (properties changed: -x to +x)
 --- src/sextant/objdump_parser.py	2014-08-18 13:00:53 +0000
 +++ src/sextant/objdump_parser.py	2014-10-23 12:33:12 +0000
@@ -1,273 +1,313 @@
--# -----------------------------------------
--# Sextant
--# Copyright 2014, Ensoft Ltd.
--# Author: Patrick Stevens
--# -----------------------------------------
--
--#!/usr/bin/python3
--
--import re
++#!/usr/bin/python
  import argparse
--import os.path
  import subprocess
  import logging
--
--class ParsedObject():
--    """
--    Represents a function as parsed from an objdump disassembly.
--    Has a name (which is the verbatim name like '__libc_start_main@plt'),
--        a position (which is the virtual memory location in hex, like '08048320'
--                    extracted from the dump),
--        and a canonical_position (which is the virtual memory location in hex
--                                  but stripped of leading 0s, so it should be a
--                                  unique id).
--    It also has a list what_do_i_call of ParsedObjects it calls using the
--      assembly keyword 'call'.
--    It has a list original_code of its assembler code, too, in case it's useful.
--    """
--
--    @staticmethod
--    def get_canonical_position(position):
--        return position.lstrip('0')
--
--    def __eq__(self, other):
--        return self.name == other.name
--
--    def __init__(self, input_lines=None, assembler_section='', function_name='',
--                 ignore_function_pointers=True, function_pointer_id=None):
--        """
--        Create a new ParsedObject given the definition-lines from objdump -S.
--        A sample first definition-line is '08048300 <__gmon_start__@plt>:\n'
--         but this method
--         expects to see the entire definition eg
--
--080482f0 <puts@plt>:
-- 80482f0:	ff 25 00 a0 04 08    	jmp    *0x804a000
-- 80482f6:	68 00 00 00 00       	push   $0x0
-- 80482fb:	e9 e0 ff ff ff       	jmp    80482e0 <_init+0x30>
--
--          We also might expect assembler_section, which is for instance '.init'
--            in 'Disassembly of section .init:'
--          function_name is used if we want to give this function a custom name.
--          ignore_function_pointers=True will pretend that calls to (eg) *eax do
--            not exist; setting to False makes us create stubs for those calls.
--          function_pointer_id is only used internally; it refers to labelling
--            of function pointers if ignore_function_pointers is False. Each
--            stub is given a unique numeric ID: this parameter tells init where
--            to start counting these IDs from.
--
--        """
--        if input_lines is None:
--            # get around Python's inability to pass in empty lists by value
--            input_lines = []
--
--        self.name = function_name or re.search(r'<.+>', input_lines[0]).group(0).strip('<>')
--        self.what_do_i_call = []
--        self.position = ''
--
--        if input_lines:
--            self.position = re.search(r'^[0-9a-f]+', input_lines[0]).group(0)
--            self.canonical_position = ParsedObject.get_canonical_position(self.position)
--            self.assembler_section = assembler_section
--            self.original_code = input_lines[1:]
++"""
++Provide a parser class to extract functions and calls from an objdump file,
++and a way to generate such a file from an object file.
++"""
++__all__ = ('Parser', 'run_objdump', 'FileNotFoundError')
++
++
++class FileNotFoundError(Exception):
++    """
++    Exception raised when Parser fails to open its file.
++    """
++    pass
++
++
++class Parser(object):
++    """
++    Extract functions and calls from an object file or an objdump output file.
++
++    Only the specified sections of the disassembled code will be parsed.
++
++    Attributes:
++        path:
++            Set to file_path in __init__.
++        _file:
++            Set to file_object in __init__.
++        sections:
++            Initialised by taking the sections argument to __init__ and
++            and converting it to a set.
++        ignore_ptrs:
++            Set to ignore_ptrs in __init__.
++
++        section_count:
++            The number of sections that have been parsed.
++        function_count:
++            The number of functions that have been parsed.
++        call_count:
++            The number of function calls that have been parsed.
++        function_ptr_count:
++            The number of function pointers that have been detected.
++        _known_stubs:
++            A set of the names of functions with type 'stub' that have been
++            parsed - used to avoid registering a stub multiple times.
++
++    """
++    def __init__(self, file_path, file_object=None,
++                 sections=None, ignore_ptrs=False,
++                 add_function=None, add_call=None,
++                 started=None, finished=None):
++        """
++        Initialise the parser object.
++
++        Raises:
++            FileNotFoundError:
++                If file_object was not provided and file_path couldn't be
++                opened.
++
++        Arguments:
++            file_path:
++                The path of the objdump output file to parse, or the path of an
++                object file to run objdump on and then parse.
++            file_object:
++                None if file_path is the path to an object file.
++                OR the file object (providing 'for line in file_object')
++            sections:
++                A list of the names of the disassembly sections to parse. An mepty
++                list will result in all sections being parsed.
++            ignore_ptrs:
++                If True, calls to function pointers will be ignored during parsing.
++            add_function:
++                A function to call when a function is parsed. Takes:
++                    name: name of the parsed function
++                    type: type of the parsed function
++            add_call:
++                A function to call when a function call is passed. Takes:
++                    caller: name of the calling function
++                    callee: name of the called function
++            started:
++                A function to call when the parse begins. Takes:
++                    parser: the Parser instance which has just began parsing..
++            finished:
++                A function to call when the parse completes. Takes:
++                    parser: the Parser instance which has just finished parsing.
++                e.g. if add_function/call have been set to write into files,
++                then finished may be set to properly flush and close them.
++        """
++        self.path = file_path
++        try:
++            self._file = file_object or self._open_file(file_path)
++        except FileNotFoundError:
++            raise
++
++        self.sections = set(sections or [])
++        self.ignore_ptrs = ignore_ptrs
++
++        self.section_count = 0
++        self.function_count = 0
++        self.call_count = 0
++        self.function_ptr_count = 0
++
++        # Avoid adding duplicate function stubs (as these are detected from
++        # function calls so may be repeated).
++        self._known_stubs = set()
++
++        # By default print information to stdout.
++        def print_func(name, typ):
++            print('func {:25}{}'.format(name, typ))
++
++        def print_call(caller, callee):
++            print('call {:25}{:25}'.format(caller, callee))
++
++        def print_started(parser):
++            print('parse started: {}[{}]'.format(self.path, ', '.join(self.sections)))
++
++
++        def print_finished(parser):
++            print('parsed {} functions and {} calls'.format(self.function_count, self.call_count))
++
++        self.add_function = add_function or print_func
++        self.add_call = add_call or print_call
++        self.started = lambda: (started or print_started)(self)
++        self.finished = lambda: (finished or print_finished)(self)
++
++
++    def _get_function_ptr_name(self):
++        """
++        Return a name for a new function pointer.
++        """
++        name = 'func_ptr_{}'.format(self.function_ptr_count)
++        self.function_ptr_count += 1
++        return name
++
++    def _add_function_normal(self, name):
++        """
++        Add a function which we have full assembly code for.
++        """
++        self.add_function(name, 'normal')
++        self.function_count += 1
++
++    def _add_function_ptr(self, name):
++        """
++        Add a function pointer.
++        """
++        self.add_function(name, 'pointer')
++        self.function_count += 1
++
++    def _add_function_stub(self, name):
++        """
++        Add a function stub - we have its name but none of its internals.
++        """
++        if not name in self._known_stubs:
++            self._known_stubs.add(name)
++            self.add_function(name, 'stub')
++            self.function_count += 1
++
++    def _add_call(self, caller, callee):
++        """
++        Add a function call from caller to callee.
++        """
++        self.add_call(caller, callee)
++        self.call_count += 1
++
++    def parse(self):
++        """
++        Parse self._file.
++        """
++        self.started()
++
++        if self._file is not None:
++            in_section = False          # if we are in one of self.sections
++            current_function = None     # track the caller for function calls
++
++            for line in self._file:
++                if line.startswith('Disassembly'):
++                    # 'Disassembly of section <name>:\n'
++                    section = line.split(' ')[-1].rstrip(':\n')
++                    in_section = section in self.sections if self.sections else True
++                    if in_section:
++                        self.section_count += 1
++
++                elif in_section:
++                    if line.endswith('>:\n'):
++                        # '<address> <<function_identifier>>:\n'
++                        # with <function_identifier> of form:
++                        # <function_name>[@plt]
++                        function_identifier = line.split('<')[-1].split('>')[0]
++
++                        if '@' in function_identifier:
++                            current_function = function_identifier.split('@')[0]
++                            self._add_function_stub(current_function)
++                        else:
++                            current_function = function_identifier
++                            self._add_function_normal(current_function)
++
++                    elif 'call ' in line or 'callq ' in line:
++                        # WHITESPACE to prevent picking up function names
++                        # containing 'call'
++
++                        # '<hex>: <hex> [l]call [hex] <callee_info>\n'
++                        callee_info = line.split(' ')[-1].rstrip('\n')
++
++                        # Where <callee_info> is either
++                        #  1) '*(<register>)'           call to a fn pointer
++                        #  2) '$<hex>,$<hex>'           lcall to a fn pointer
++                        #  3) '<<function_identifier>>' call to a named function
++                        if '<' in callee_info and '>' in callee_info:
++                            # call to a normal or stub function
++                            # '<function_identifier>' is of form <name>[@/-/+]<...>
++                            # from which we extract name
++                            callee_is_ptr = False
++                            function_identifier = callee_info.lstrip('<').rstrip('>\n')
++                            if '@' in function_identifier:
++                                callee = function_identifier.split('@')[0]
++                                self._add_function_stub(callee)
++                            else:
++                                callee = function_identifier.split('-')[-1].split('+')[0]
++                                # Do not add this fn now - it is a normal func
++                                # so we know about it from elsewhere.
++
++                        else:
++                            # Some kind of function pointer call.
++                            callee_is_ptr = True
++                            if not self.ignore_ptrs:
++                                callee = self._get_function_ptr_name()
++                                self._add_function_ptr(callee)
++
++                        # Add the call.
++                        if not (self.ignore_ptrs and callee_is_ptr):
++                            self._add_call(current_function, callee)
--            call_regex_compiled = (ignore_function_pointers and re.compile(r'\tcall. +[^\*]+\n')) or re.compile(r'\tcall. +.+\n')
--
--            lines_where_i_call = [line for line in input_lines if call_regex_compiled.search(line)]
--
--            if not ignore_function_pointers and not function_pointer_id:
--                function_pointer_id = [1]
--
--            for line in lines_where_i_call:
--                # we'll catch call and callq for the moment
--                called = (call_regex_compiled.search(line).group(0))[8:].lstrip(' ').rstrip('\n')
--                if called[0] == '*' and ignore_function_pointers == False:
--                    # we have a function pointer, which we'll want to give a distinct name
--                    address = '0'
--                    name = '_._function_pointer_' + str(function_pointer_id[0])
--                    function_pointer_id[0] += 1
--
--                    self.what_do_i_call.append((address, name))
--
--                else: # we're not on a function pointer
--                    called_split = called.split(' ')
--                    if len(called_split) == 2:
--                        address, name = called_split
--                        name = name.strip('<>')
--                        # we still want to remove address offsets like +0x09 from the end of name
--                        match = re.match(r'^.+(?=\+0x[a-f0-9]+$)', name)
--                        if match is not None:
--                            name = match.group(0)
--                        self.what_do_i_call.append((address, name.strip('<>')))
--                    else:  # the format of the "what do i call" is not recognised as a name/address pair
--                        self.what_do_i_call.append(tuple(called_split))
--
--    def __str__(self):
--        if self.position:
--            return 'Memory address ' + self.position + ' with name ' + self.name + ' in section ' + str(
--                self.assembler_section)
++            self.finished()
++
++            self._file.close()
++            result = True
          else:
--            return 'Name ' + self.name
--
--    def __repr__(self):
--        out_str = 'Disassembly of section ' + self.assembler_section + ':\n\n' + self.position + ' ' + self.name + ':\n'
--        return out_str + '\n'.join([' ' + line for line in self.original_code])
--
--
--class Parser:
--    # Class to manipulate the output of objdump
--
--    def __init__(self, input_file_location='', file_contents=None, sections_to_view=None, ignore_function_pointers=False):
--        """Creates a new Parser, given an input file path. That path should be an output from objdump -D.
--        Alternatively, supply file_contents, as a list of each line of the objdump output. We expect newlines
--         to have been stripped from the end of each of these lines.
--         sections_to_view makes sure we only use the specified sections (use [] for 'all sections' and None for none).
--        """
--        if file_contents is None:
--            file_contents = []
--
--        if sections_to_view is None:
--            sections_to_view = []
--
--        if input_file_location:
--            file_to_read = open(input_file_location, 'r')
--            self.source_string_list = [line for line in file_to_read]
--            file_to_read.close()
--        elif file_contents:
--            self.source_string_list = [string + '\n' for string in file_contents]
--        self.parsed_objects = []
--        self.sections_to_view = sections_to_view
--        self.ignore_function_pointers = ignore_function_pointers
--        self.pointer_identifier = [1]
--
--    def create_objects(self):
--        """ Go through the source_string_list, getting object names (like 'main') along with the corresponding
--         definitions, and put them into parsed_objects """
--        if self.sections_to_view is None:
--            return
--
--        is_in_section = lambda name: self.sections_to_view == [] or name in self.sections_to_view
--
--        parsed_objects = []
--        current_object = []
--        current_section = ''
--        regex_compiled_addr_and_name = re.compile(r'[0-9a-f]+ <.+>:\n')
--        regex_compiled_section = re.compile(r'section .+:\n')
--
--        for line in self.source_string_list[4:]:  # we bodge, since the file starts with a little bit of guff
--            if regex_compiled_addr_and_name.match(line):
--                # we are a starting line
--                current_object = [line]
--            elif re.match(r'Disassembly of section', line):
--                current_section = regex_compiled_section.search(line).group(0).lstrip('section ').rstrip(':\n')
--                current_object = []
--            elif line == '\n':
--                # we now need to stop parsing the current block, and store it
--                if len(current_object) > 0 and is_in_section(current_section):
--                    parsed_objects.append(ParsedObject(input_lines=current_object, assembler_section=current_section,
--                                                       ignore_function_pointers=self.ignore_function_pointers,
--                                                       function_pointer_id=self.pointer_identifier))
--            else:
--                current_object.append(line)
--
--        # now we should be done. We assumed that blocks begin with r'[0-9a-f]+ <.+>:\n' and end with a newline.
--        # clear duplicates:
--
--        self.parsed_objects = []
--        for obj in parsed_objects:
--            if obj not in self.parsed_objects: # this is so that if we jump into the function at an offset,
--                # we still register it as being the old function, not some new function at a different address
--                # with the same name
--                self.parsed_objects.append(obj)
--
--                # by this point, each object contains a self.what_do_i_call which is a list of tuples
--                #  ('address', 'name') if the address and name were recognised, or else (thing1, thing2, ...)
--                # where the instruction was call thing1 thing2 thing3... .
--
--    def object_lookup(self, object_name='', object_address=''):
--        """Returns the object with name object_name or address object_address (at least one must be given).
--        If objects with the given name or address
--        are not found, returns None."""
--
--        if object_name == '' and object_address == '':
--            return None
--
--        trial_obj = self.parsed_objects
--
--        if object_name != '':
--            trial_obj = [obj for obj in trial_obj if obj.name == object_name]
--
--        if object_address != '':
--            trial_obj = [obj for obj in trial_obj if
--                         obj.canonical_position == ParsedObject.get_canonical_position(object_address)]
--
--        if len(trial_obj) == 0:
--            return None
--
--        return trial_obj
--
--def get_parsed_objects(filepath, sections_to_view, not_object_file, readable=False, ignore_function_pointers=False):
--    if sections_to_view is None:
--        sections_to_view = []  # because we use None for "no sections"; the intent of not providing any sections
--        # on the command line was to look at all sections, not none
--
--    # first, check whether the given file exists
--    if not os.path.isfile(filepath):
--        # we'd like to use FileNotFoundError, but we might be running under
--        # Python 2, which doesn't have it.
--        raise IOError(filepath + 'is not found.')
--
--    #now the file should exist
--    if not not_object_file:  #if it is something we need to run through objdump first
--        #we need first to run the object file through objdump
--
--        objdump_file_contents = subprocess.check_output(['objdump', '-D', filepath])
--        objdump_str = objdump_file_contents.decode('utf-8')
--
--        p = Parser(file_contents=objdump_str.split('\n'), sections_to_view=sections_to_view, ignore_function_pointers=ignore_function_pointers)
--    else:
++            result = False
++
++        return result
++
++    def _open_file(self, path):
++        """
++        Open and return the file at path.
++
++        Raises:
++            FileNotFoundError:
++                If the file fails to open.
++
++        Arguments:
++            path:
++                The path of the file to open.
++        """
          try:
--            p = Parser(input_file_location=filepath, sections_to_view=sections_to_view, ignore_function_pointers=ignore_function_pointers)
--        except UnicodeDecodeError:
--            logging.error('File could not be parsed as a string. Did you mean to supply --object-file?')
--            return False
--
--    if readable: # if we're being called from the command line
--        print('File read; beginning parse.')
--    #file is now read, and we start parsing
--
--    p.create_objects()
--    return p.parsed_objects
++            result = open(path)
++        except Exception as e:
++            raise FileNotFoundError("parser failed to open `{}`: {}".format(path, e.strerror))
++
++        return result
++
++
++def run_objdump(input_file):
++    """
++    Run the objdump command on the file with the given path.
++
++    Return the input file path and a file object representing the result of
++    the objdump.
++
++    Arguments:
++        input_file:
++            The path of the file to run objdump on.
++
++    """
++    # A single section can be specified for parsing with the -j flag,
++    # but it is not obviously possible to parse multiple sections like this.
++    p = subprocess.Popen(['objdump', '-d', input_file, '--no-show-raw-insn'],
++                         stdout=subprocess.PIPE)
++    g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$'], stdin=p.stdout, stdout=subprocess.PIPE)
++    return input_file, g.stdout
++
  def main():
--    argumentparser = argparse.ArgumentParser(description="Parse the output of objdump.")
--    argumentparser.add_argument('--filepath', metavar="FILEPATH", help="path to input file", type=str, nargs=1)
--    argumentparser.add_argument('--not-object-file', help="import text objdump output instead of the compiled file", default=False,
--                                action='store_true')
--    argumentparser.add_argument('--sections-to-view', metavar="SECTIONS",
--                                help="sections of disassembly to view, like '.text'; leave blank for 'all'",
--                                type=str, nargs='*')
--    argumentparser.add_argument('--ignore-function-pointers', help='whether to skip parsing calls to function pointers', action='store_true', default=False)
--
--    parsed = argumentparser.parse_args()
++    """
++    Run the parser from the command line.
++
++    The path of the target file, the sections to view and the ignore function
++    pointers flag are set with command line arguments.
++    """
++    ap = argparse.ArgumentParser(description="Parse the output of objdump.")
++    ap.add_argument('--filepath', metavar="FILEPATH",
++                    help="path to input file", type=str, nargs=1)
++
++    ap.add_argument('--sections-to-view', metavar="SECTIONS",
++                    help="disassembly sections to view, eg '.text'; leave blank for 'all'",
++                    type=str, nargs='*')
++    ap.add_argument('--ignore-function-pointers',
++                    help='skip parsing calls to function pointers',
++                    action='store_true', default=False)
++
++    args = ap.parse_args()
--    filepath = parsed.filepath[0]
--    sections_to_view = parsed.sections_to_view
--    not_object_file = parsed.not_object_file
--    readable = True
--    function_pointers = parsed.ignore_function_pointers
--
--    parsed_objs = get_parsed_objects(filepath, sections_to_view, not_object_file, readable, function_pointers)
--    if parsed_objs is False:
--        return 1
--
--    if readable:
--        for named_function in parsed_objs:
--            print(named_function.name)
--            print([f[-1] for f in named_function.what_do_i_call])  # use [-1] to get the last element, since:
--        #either we are in ('address', 'name'), when we want the last element, or else we are in (thing1, thing2, ...)
--        #so for the sake of argument we'll take the last thing
--
--if __name__ == "__main__":
++    filepath = args.filepath[0]
++    sections = args.sections_to_view
++    ignore_ptrs = args.ignore_function_pointers
++
++    parser = Parser(filepath, sections, ignore_ptrs)
++    parser.parse()
++
++
++if __name__ == '__main__':
      main()
 === modified file 'src/sextant/query.py'
 --- src/sextant/query.py	2014-08-26 16:33:20 +0000
 +++ src/sextant/query.py	2014-10-23 12:33:12 +0000
@@ -14,7 +14,7 @@
  from .export import ProgramConverter
--def query(remote_neo4j, input_query, display_neo4j='', program_name=None,
++def query(connection, display_neo4j='', program_name=None,
            argument_1=None, argument_2=None, suppress_common=False):
      """
      Run a query against the database at remote_neo4j.
@@ -36,24 +36,24 @@
      """
--    if display_neo4j:
--        display_url = display_neo4j
--    else:
--        display_url = remote_neo4j
++    # if display_neo4j:
++    #     display_url = display_neo4j
++    # else:
++    #     display_url = remote_neo4j
--    try:
--        db = db_api.SextantConnection(remote_neo4j)
--    except requests.exceptions.ConnectionError as err:
--        logging.error("Could not connect to Neo4J server {}. Are you sure it is running?".format(display_url))
--        logging.error(str(err))
--        return 2
--    #Not supported in python 2
--    #except (urllib.exceptions.MaxRetryError):
--     #   logging.error("Connection was refused to {}. Are you sure the server is running?".format(remote_neo4j))
--      #  return 2
--    except Exception as err:
--        logging.exception(str(err))
--        return 2
++    # try:
++    #     db = db_api.SextantConnection(remote_neo4j)
++    # except requests.exceptions.ConnectionError as err:
++    #     logging.error("Could not connect to Neo4J server {}. Are you sure it is running?".format(display_url))
++    #     logging.error(str(err))
++    #     return 2
++    # #Not supported in python 2
++    # #except (urllib.exceptions.MaxRetryError):
++    #  #   logging.error("Connection was refused to {}. Are you sure the server is running?".format(remote_neo4j))
++    #   #  return 2
++    # except Exception as err:
++    #     logging.exception(str(err))
++    #     return 2
      prog = None
      names_list = None
@@ -66,38 +66,38 @@
          if argument_1 is None:
              print('Supply one function name to functions-calling.')
              return 1
--        prog = db.get_all_functions_calling(program_name, argument_1)
++        prog = connection.get_all_functions_calling(program_name, argument_1)
      elif input_query == 'functions-called-by':
          if argument_1 is None:
              print('Supply one function name to functions-called-by.')
              return 1
--        prog = db.get_all_functions_called(program_name, argument_1)
++        prog = connection.get_all_functions_called(program_name, argument_1)
      elif input_query == 'all-call-paths':
          if argument_1 is None and argument_2 is None:
              print('Supply two function names to calls-between.')
              return 1
--        prog = db.get_call_paths(program_name, argument_1, argument_2)
++        prog = connection.get_call_paths(program_name, argument_1, argument_2)
      elif input_query == 'whole-program':
--        prog = db.get_whole_program(program_name)
++        prog = connection.get_whole_program(program_name)
      elif input_query == 'shortest-call-path':
          if argument_1 is None and argument_2 is None:
              print('Supply two function names to shortest-path.')
              return 1
--        prog = db.get_shortest_path_between_functions(program_name, argument_1, argument_2)
++        prog = connection.get_shortest_path_between_functions(program_name, argument_1, argument_2)
      elif input_query == 'functions':
          if program_name is not None:
--            func_names = db.get_function_names(program_name)
++            func_names = connection.get_function_names(program_name)
              if func_names:
                  names_list = list(func_names)
              else:
                  print('No functions were found in program %s on server %s.' % (program_name, display_url))
          else:
--            list_of_programs = db.get_program_names()
++            list_of_programs = connection.get_program_names()
              if not list_of_programs:
                  print('Server %s database empty.' % (display_url))
                  return 0
--            func_list = [db.get_function_names(prog_name)
++            func_list = [connection.get_function_names(prog_name)
                           for prog_name in list_of_programs]
              if not func_list:
@@ -105,7 +105,7 @@
              else:
                  names_list = func_list
      elif input_query == 'programs':
--        list_found = list(db.get_program_names())
++        list_found = list(connection.get_program_names())
          if not list_found:
              print('No programs were found on server {}.'.format(display_url))
          else:
@@ -122,7 +122,5 @@
          print('Nothing was returned from the query.')
--def audit(remote_neo4j):
--    db = db_api.SextantConnection(remote_neo4j)
--
--    return db.programs_with_metadata()
++def audit(connection):
++    return connection.programs_with_metadata()
 === added file 'src/sextant/sshmanager.py'
 --- src/sextant/sshmanager.py	1970-01-01 00:00:00 +0000
 +++ src/sextant/sshmanager.py	2014-10-23 12:33:12 +0000
@@ -0,0 +1,278 @@
++import os
++import getpass
++import logging
++import subprocess
++
++"""Provide a class to manage an SSH tunnel and controller"""
++__all__ = ('SSHConnectionError', 'SSHCommandError', 'SSHManager')
++
++# The location of the temporary directory to create on the REMOTE machine.
++# Temporary files will be scp'd here prior to upload to the neo4j database.
++TMP_DIR = '/tmp/sextant'
++
++
++class SSHConnectionError(Exception):
++    """
++    An exception raised when an attempt to establish an ssh conneciton fails.
++    """
++    pass
++
++
++class SSHCommandError(Exception):
++    """
++    An exception raised when an attempt to run a command over ssh fails.
++    """
++    pass
++
++
++class SSHManager(object):
++    """
++    Manage an ssh tunnel with port forwarding.
++
++    Attributes:
++        local_port:
++            The port number on the local machine to forward.
++        remote_host:
++            The host to ssh into.
++        remote_port:
++            The port number on the remote host to connect to.
++        ssh_user:
++            The username to use for sshing - defaults to None, in which case
++            the ssh connection uses the username of the user who ran sextant.
++
++        _controller_name:
++            The base of the identifying name for the ssh controller - the
++            actual name will be a combination of this and the local port.
++        _is_localhost:
++            True if we are trying to ssh into localhost. In this case do not
++            open the tunnel, just provide the right api so the rest of Sextant
++            need not special case.
++    """
++
++    def __init__(self, local_port, remote_host, remote_port,
++                 ssh_user=None, is_localhost=False):
++        """
++        Open an SSH tunnel with multiplexing enabled.
++
++        Raises:
++            ValueError:
++                If local_port or remote_port are not positive integers
++
++        Arguments:
++            local_port:
++                The number of the local port to forward.
++            remote_host:
++                The name of the remote host to connect to.
++            remote_port:
++                The port number on the remote host to connect to.
++            ssh_user:
++                An alternative user name to use for the ssh login.
++            is_localhost:
++                True if we are trying to ssh into localhost.
++        """
++        if not (isinstance(local_port, int) and local_port > 0):
++            raise ValueError(
++                'Local port {} must be a positive integer.'.format(local_port))
++        if not (isinstance(remote_port, int) and remote_port > 0):
++            raise ValueError(
++                'Remote port {} must be a positive integer.'.format(remote_port))
++
++        self.local_port = local_port
++        self.remote_host = remote_host
++        self.remote_port = remote_port
++        self.ssh_user = ssh_user
++
++        self._tmp_dir = '{}-{}'.format(TMP_DIR, self.ssh_user or getpass.getuser())
++
++        self._controller_name = 'sextantcontroller{}'.format(local_port)
++        self._is_localhost = is_localhost
++
++        self._open()
++
++    def _open(self):
++        """
++        Helper function to open the SSH tunnel.
++
++        Raises:
++            SSHConnectionError:
++                If the ssh command failed to run.
++        """
++        if self._is_localhost:
++            return
++
++        # This cmd string will be .format()ed in a few lines' time.
++        cmd = ['ssh']
++
++        if self.ssh_user:
++            # ssh -l {user} ... sets the remote login username
++            cmd.extend(['-l', self.ssh_user])
++
++        # -L localport:localhost:remoteport forwards the port.
++        port_fwd = '{}:localhost:{}'.format(self.local_port, self.remote_port)
++
++        # -M makes SSH able to accept slave connections.
++        # -S sets the location of a control socket (in this case, sextantcontroller.
++        #    with a unique identifier appended, just in case we run sextant twice.
++        #    simultaneously), so we know how to close the port again.
++        # -f goes into background; -N does not execute a remote command;
++        # -T says to remote host that we don't want a text shell.
++        cmd.extend(['-M', '-S', self._controller_name, '-fNT',
++                    '-L', port_fwd, self.remote_host])
++
++        logging.debug('Opening SSH tunnel with cmd: {}'.format(' '.join(cmd)))
++
++        rc = subprocess.call(cmd)
++        if rc:
++            raise SSHConnectionError('SSH setup failed with error {}'.format(rc))
++
++        logging.debug('SSH tunnel created')
++
++        self._make_tmp_dir()
++
++    def close(self):
++        """
++        Close the SSH tunnel after cleaning the temp directory.
++        """
++        if self._is_localhost:
++            return
++
++        # ssh -O sends a command to the slave specified in -S, -q for quiet.
++        cmd = ['ssh', '-S', self._controller_name,
++               '-O', 'exit', '-q', self.remote_host]
++
++        logging.debug('Shutting down SSH tunnel with cmd: `{}`'
++                      .format(' '.join(cmd)))
++
++        # SSH has a bug on some systems which causes it to ignore the -q flag
++        # meaning it prints "Exit request sent." to stderr.
++        # To avoid this, we grab stderr temporarily, and see if it's that string;
++        # if it is, suppress it.
++        pr = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
++        stdout, stderr = pr.communicate()
++        if stderr.rstrip() != 'Exit request sent.':
++            logging.error('SSH shutdown stderr: {}'.format(stderr))
++
++        if pr.returncode == 0:
++            logging.debug('Shut down successfully')
++        else:
++            logging.error('SSH shutdown failed with code {}'
++                          .format(pr.returncode))
++
++        # Clean the temporary directory we created earlier.
++        self._delete_tmp_dir()
++
++    def _call(self, *args):
++        """
++        Execute a command on the remote machine over SSH.
++
++        Return a tuple of rc, stdout, stderr from the process call.
++
++        Arguments:
++            *args:
++                Strings containing the individual words of the command to
++                execute. E.g. _call('ls', '-lh', '.').
++        """
++        if self._is_localhost:
++            return (1, None, 'Cannot call SSH command from localhost')
++
++        ssh_cmd = ['ssh', '-S', self._controller_name, self.remote_host]
++        ssh_cmd.extend(args)
++        p = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
++        stdout, stderr = p.communicate()
++
++        if p.returncode:
++            logging.debug('Call to `{}` failed with code: {}, stderr: {}'
++                          .format(' '.join(ssh_cmd), p.returncode, stderr))
++
++        return p.returncode, stdout, stderr
++
++    def _make_tmp_dir(self):
++        """
++        Create the per-user temporary directory on the remote machine.
++        """
++        self._call('mkdir', '-p', self._tmp_dir)
++
++    def _delete_tmp_dir(self):
++        """
++        Remove the temporary directory on the remote machine.
++        """
++        self._call('rm', '-r', self._tmp_dir)
++
++
++    def send_to_tmp_dir(self, path_list):
++        """
++        Send the specified files to the temporary directory on the remote machine.
++
++        Return an iterator of save paths on the remote machine.
++        Raises:
++            ValueError:
++                If no file paths were provided, or if one or more of the
++                provided paths is not an actual file.
++            SSHCommandError:
++                If the scp command failed for any reason.
++
++        Arguments:
++            path_list:
++                Iterator of paths to the files on the local machine. All files
++                will be checked before copying to ensure that they exist and
++                to prevent passing arbitrary arguments to the ssh _call
++                command.
++        """
++        if not path_list:
++            raise ValueError('attempt to copy zero files')
++
++        # If we are in localhost, we are not controlling the TMP_DIR,
++        # so the files are already there.
++        if self._is_localhost:
++            return path_list
++
++        # Make sure we can take the len of path_list and iterate over it
++        # more than once.
++        path_list = list(path_list)
++
++        # Check that actual files are being copied - not random strings.
++        to_copy = [f for f in path_list if os.path.isfile(f)]
++
++        if len(to_copy) < len(path_list):
++            missed = [f for f in path_list if not f in to_copy]
++            raise ValueError('Attempted to copy non existant files: {}'
++                             .format(', '.join(missed)))
++
++        scp_cmd = ['scp']
++        scp_cmd.extend(to_copy)
++        scp_cmd.append('{}:{}'.format(self.remote_host, self._tmp_dir))
++
++        proc = subprocess.Popen(scp_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
++        rc = proc.wait()
++        if rc:
++            raise SSHCommandError('scp failed with code {}: {}'.format(rc, stderr))
++
++        return (os.path.join(self._tmp_dir, os.path.basename(f)) for f in to_copy)
++
++    def remove_from_tmp_dir(self, path_list):
++        """
++        Delete the files specified as arguments from the remote machine.
++
++        The output of send_to_tmp_dir may be passed as input to this function.
++
++        Raises:
++            SSHCommandError:
++                If the rm command fails for any reason.
++
++        Arguments:
++            path_list:
++                Iterator of paths of the files on the remote machine, relative
++                to the temporary directory. E.g. remove_from_tmp_dir('foo')
++                will delete the file self._tmp_dir/foo
++        """
++        if self._is_localhost:
++            return
++
++        # Assume we can trust this file list.
++        paths = [os.path.join(self._tmp_dir, os.path.basename(f)) for f in path_list]
++        self._call('rm', *paths)
++
++
++
++
++
 === added file 'src/sextant/test_all.sh'
 --- src/sextant/test_all.sh	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_all.sh	2014-10-23 12:33:12 +0000
@@ -0,0 +1,4 @@
++#!/usr/bin/bash
++
++PYTHONPATH=$PYTHONPATH:~/.
++python -m unittest discover --pattern=test_*.py
 === added file 'src/sextant/test_csvwriter.py'
 --- src/sextant/test_csvwriter.py	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_csvwriter.py	2014-10-23 12:33:12 +0000
@@ -0,0 +1,89 @@
++#!/usr/bin/python
++import unittest
++from csvwriter import CSVWriter
++import subprocess
++from os import listdir
++
++class TestSequence(unittest.TestCase):
++    def get_writer(self, path='tmp_test', headers=['name', 'type'], split=100):
++        return CSVWriter(path, headers, split)
++
++    def tearDown(self):
++        to_rm = [f for f in listdir('.') if f.startswith('tmp_test') and f.endswith('.csv')]
++        if to_rm:
++            rc = subprocess.call(['rm'] + to_rm)
++            if rc:
++                msg = 'failed to clean'
++            else:
++                msg = 'cleaned'
++            print('{} {} files {}'.format(msg, len(to_rm), to_rm))
++
++    def test_headers(self):
++        # check that headers are being written correctly
++        headers = ['some', 'headers', 'to', 'check']
++        writer = self.get_writer(headers=headers)
++        writer.finish()
++
++        expected_path = 'tmp_test0.csv'
++        self.assertEquals(writer.file_iter().next(), expected_path)
++        writer_file = open('tmp_test0.csv', 'r')
++
++        self.assertEquals(writer_file.readline(), 'some,headers,to,check\n')
++        self.assertFalse(writer_file.readline()) # check that nothing extra is written
++
++        writer_file.close()
++
++    def test_writing(self):
++        # check that csv entries are written correctly, and errors
++        # appropriately raised for invalid input
++        writer = self.get_writer()
++
++        self.assertRaises(ValueError, writer.write, 'too short')
++        self.assertRaises(ValueError, writer.write, 'slightly', 'too', 'long')
++        writer.write('just', 'write')
++
++        writer.finish()
++
++        writer_file = open(writer.file_iter().next(), 'r+')
++
++        self.assertEqual(writer_file.readline(), 'name,type\n')
++        self.assertEqual(writer_file.readline(), 'just,write\n')
++        self.assertFalse(writer_file.readline())
++
++        writer_file.close()
++
++    def test_split(self):
++        split = 10
++        files = 10
++        writer = self.get_writer(split=split)
++
++        for i in xrange(files*(split-1)): # split-1 to account for header line
++            writer.write('an', 'entry')
++
++        writer.finish()
++
++        gen_count = sum(1 for f in writer.file_iter())
++        self.assertEqual(gen_count, files,
++                         'generated {} files, expected {}'
++                         .format(gen_count, files))
++
++        for f in writer.file_iter():
++            with open(f, 'r+') as wf:
++                header_line = wf.readline()
++                header_expected = 'name,type\n'
++                self.assertEqual(header_line, header_expected,
++                                 '{} contained header {}, expected {}'
++                                 .format(f, header_line, header_expected)) # check headers
++
++            # check line count
++            with open(f, 'r+') as wf:
++                line_count = sum(1 for line in wf)
++                self.assertEqual(line_count, split,
++                                 '{} contained {} lines, expected {}'
++                                 .format(f, line_count, split))
++
++
++if __name__ == '__main__':
++    unittest.main()
++
++
 === renamed file 'src/sextant/tests.py' => 'src/sextant/test_db_api.py' (properties changed: -x to +x)
 --- src/sextant/tests.py	2014-08-14 15:23:39 +0000
 +++ src/sextant/test_db_api.py	2014-10-23 12:33:12 +0000
@@ -1,3 +1,4 @@
++#!/usr/bin/python
  # -----------------------------------------
  # Sextant
  # Copyright 2014, Ensoft Ltd.
@@ -10,56 +11,69 @@
  from db_api import Function
  from db_api import FunctionQueryResult
  from db_api import SextantConnection
--from db_api import Validator
++from db_api import validate_query
  class TestFunctionQueryResults(unittest.TestCase):
--    def setUp(self):
++    @classmethod
++    def setUpClass(cls):
          # we need to set up the remote database by using the neo4j_input_api
--        self.remote_url = 'http://ensoft-sandbox:7474'
--
--        self.setter_connection = SextantConnection(self.remote_url)
--        self.program_1_name = 'testprogram'
--        self.upload_program = self.setter_connection.new_program(self.program_1_name)
--        self.upload_program.add_function('func1')
--        self.upload_program.add_function('func2')
--        self.upload_program.add_function('func3')
--        self.upload_program.add_function('func4')
--        self.upload_program.add_function('func5')
--        self.upload_program.add_function('func6')
--        self.upload_program.add_function('func7')
--        self.upload_program.add_function_call('func1', 'func2')
--        self.upload_program.add_function_call('func1', 'func4')
--        self.upload_program.add_function_call('func2', 'func1')
--        self.upload_program.add_function_call('func2', 'func4')
--        self.upload_program.add_function_call('func3', 'func5')
--        self.upload_program.add_function_call('func4', 'func4')
--        self.upload_program.add_function_call('func4', 'func5')
--        self.upload_program.add_function_call('func5', 'func1')
--        self.upload_program.add_function_call('func5', 'func2')
--        self.upload_program.add_function_call('func5', 'func3')
--        self.upload_program.add_function_call('func6', 'func7')
--
--        self.upload_program.commit()
--
--        self.one_node_program_name = 'testprogram1'
--        self.upload_one_node_program = self.setter_connection.new_program(self.one_node_program_name)
--        self.upload_one_node_program.add_function('lonefunc')
--
--        self.upload_one_node_program.commit()
++        cls.remote_url = 'http://ensoft-sandbox:7474'
++
++        cls.setter_connection = SextantConnection('ensoft-sandbox', 7474)
++
++        cls.program_1_name = 'testprogram'
++        cls.one_node_program_name = 'testprogram1'
++        cls.empty_program_name = 'testprogramblank'
++
++        # if anything failed before, delete programs now
++        cls.setter_connection.delete_program(cls.program_1_name)
++        cls.setter_connection.delete_program(cls.one_node_program_name)
++        cls.setter_connection.delete_program(cls.empty_program_name)
++
++
++        cls.upload_program = cls.setter_connection.new_program(cls.program_1_name)
++        cls.upload_program.add_function('func1')
++        cls.upload_program.add_function('func2')
++        cls.upload_program.add_function('func3')
++        cls.upload_program.add_function('func4')
++        cls.upload_program.add_function('func5')
++        cls.upload_program.add_function('func6')
++        cls.upload_program.add_function('func7')
++        cls.upload_program.add_call('func1', 'func2')
++        cls.upload_program.add_call('func1', 'func4')
++        cls.upload_program.add_call('func2', 'func1')
++        cls.upload_program.add_call('func2', 'func4')
++        cls.upload_program.add_call('func3', 'func5')
++        cls.upload_program.add_call('func4', 'func4')
++        cls.upload_program.add_call('func4', 'func5')
++        cls.upload_program.add_call('func5', 'func1')
++        cls.upload_program.add_call('func5', 'func2')
++        cls.upload_program.add_call('func5', 'func3')
++        cls.upload_program.add_call('func6', 'func7')
++
++        cls.upload_program.commit()
++
++        cls.upload_one_node_program = cls.setter_connection.new_program(cls.one_node_program_name)
++        cls.upload_one_node_program.add_function('lonefunc')
++
++        cls.upload_one_node_program.commit()
--        self.empty_program_name = 'testprogramblank'
--        self.upload_empty_program = self.setter_connection.new_program(self.empty_program_name)
--
--        self.upload_empty_program.commit()
--
--        self.getter_connection = SextantConnection(self.remote_url)
--
--    def tearDown(self):
--        self.setter_connection.delete_program(self.upload_program.program_name)
--        self.setter_connection.delete_program(self.upload_one_node_program.program_name)
--        self.setter_connection.delete_program(self.upload_empty_program.program_name)
--        del(self.setter_connection)
++        cls.upload_empty_program = cls.setter_connection.new_program(cls.empty_program_name)
++
++        cls.upload_empty_program.commit()
++
++        cls.getter_connection = cls.setter_connection
++
++
++    @classmethod
++    def tearDownClass(cls):
++        cls.setter_connection.delete_program(cls.upload_program.program_name)
++        cls.setter_connection.delete_program(cls.upload_one_node_program.program_name)
++        cls.setter_connection.delete_program(cls.upload_empty_program.program_name)
++
++        cls.setter_connection.close()
++        del(cls.setter_connection)
      def test_17_get_call_paths(self):
          reference1 = FunctionQueryResult(parent_db=None, program_name=self.program_1_name)
@@ -134,7 +148,7 @@
      def test_08_get_program_names(self):
          reference = {self.program_1_name, self.one_node_program_name, self.empty_program_name}
--        self.assertEqual(reference, self.getter_connection.get_program_names())
++        self.assertTrue(reference.issubset(self.getter_connection.get_program_names()))
      def test_11_get_all_functions_called(self):
@@ -249,13 +263,13 @@
          self.assertIsNone(self.getter_connection.get_call_paths(self.one_node_program_name, 'notafunc', 'notafunc'))
      def test_10_validator(self):
--        self.assertFalse(Validator.validate(''))
--        self.assertTrue(Validator.validate('thisworks'))
--        self.assertTrue(Validator.validate('th1sw0rks'))
--        self.assertTrue(Validator.validate('12345'))
--        self.assertFalse(Validator.validate('this does not work'))
--        self.assertTrue(Validator.validate('this_does_work'))
--        self.assertFalse(Validator.validate("'")) # string consisting of a single quote mark
++        self.assertFalse(validate_query(''))
++        self.assertTrue(validate_query('thisworks'))
++        self.assertTrue(validate_query('th1sw0rks'))
++        self.assertTrue(validate_query('12345'))
++        self.assertFalse(validate_query('this does not work'))
++        self.assertTrue(validate_query('this_does_work'))
++        self.assertFalse(validate_query("'")) # string consisting of a single quote mark
  if __name__ == '__main__':
--    unittest.main()
 \ No newline at end of file
++    unittest.main()
 === added file 'src/sextant/test_parser.py'
 --- src/sextant/test_parser.py	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_parser.py	2014-10-23 12:33:12 +0000
@@ -0,0 +1,85 @@
++#!/usr/bin/python
++from collections import defaultdict
++import unittest
++import subprocess
++
++import objdump_parser as parser
++
++DUMP_FILE = 'test_resources/parser_test.dump'
++
++class TestSequence(unittest.TestCase):
++    def setUp(self):
++        pass
++
++    def add_function(self, dct, name, typ):
++        self.assertFalse(name in dct, "duplicate function added: {} into {}".format(name, dct.keys()))
++        dct[name] = typ
++
++    def add_call(self, dct, caller, callee):
++        dct[caller].append(callee)
++
++    def do_parse(self, path=DUMP_FILE, sections=['.text'], ignore_ptrs=False):
++        functions = {}
++        calls = defaultdict(list)
++
++        # set the Parser to put output in local dictionaries
++        add_function = lambda n, t: self.add_function(functions, n, t)
++        add_call = lambda a, b: self.add_call(calls, a, b)
++
++        p = parser.Parser(path, sections=sections, ignore_ptrs=ignore_ptrs,
++                          add_function=add_function, add_call=add_call)
++        res = p.parse()
++
++        parser.add_function = None
++        parser.add_call = None
++
++        return res, functions, calls
++
++
++    def test_open(self):
++        self.assertRaises(parser.FileNotFoundError, parser.Parser, file_path='rubbish file')
++
++    def test_functions(self):
++        # ensure that the correct functions are listed with the correct types
++        res, funcs, calls = self.do_parse()
++
++        for name, typ in zip(['normal', 'duplicates', 'wierd$name', 'printf', 'func_ptr_3'],
++                             ['normal', 'normal', 'normal', 'stub', 'pointer']):
++            self.assertTrue(name in funcs, "'{}' not found in function dictionary".format(name))
++            self.assertEquals(funcs[name], typ)
++
++        self.assertFalse('__gmon_start__' in funcs, "don't see a function defined in .plt")
++
++    def test_no_ptrs(self):
++        # ensure that the ignore_ptrs flags is working
++        res, funcs, calls = self.do_parse(ignore_ptrs=True)
++
++        self.assertFalse('pointer' in funcs.values())
++        self.assertEqual(len(calls['normal']), 2)
++
++
++    def test_calls(self):
++        res, funcs, calls = self.do_parse()
++
++        self.assertTrue('normal' in calls['main'])
++        self.assertTrue('duplicates' in calls['main'])
++
++        normal_calls = sorted(['wierd$name', 'printf', 'func_ptr_3'])
++        self.assertEquals(sorted(calls['normal']), normal_calls)
++
++        self.assertEquals(calls['duplicates'].count('normal'), 2)
++        self.assertEquals(calls['duplicates'].count('printf'), 2,
++                          "expected 2 printf calls in {}".format(calls['duplicates']))
++        self.assertTrue('func_ptr_4' in calls['duplicates'])
++        self.assertTrue('func_ptr_5' in calls['duplicates'])
++
++    def test_sections(self):
++        res, funcs, calls = self.do_parse(sections=['.plt', '.text'])
++
++        # check that we have got rid of the @s in the names
++        self.assertTrue('@' not in ''.join(funcs.keys()), "check names are extracted correctly")
++        self.assertTrue('__gmon_start__' in funcs, "see a function defined only in .plt")
++
++
++if __name__ == '__main__':
++    unittest.main()
 === added directory 'src/sextant/test_resources'
 === added file 'src/sextant/test_resources/parser_test'
 Binary files src/sextant/test_resources/parser_test	1970-01-01 00:00:00 +0000 and src/sextant/test_resources/parser_test	2014-10-23 12:33:12 +0000 differ
 === added file 'src/sextant/test_resources/parser_test.c'
 --- src/sextant/test_resources/parser_test.c	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_resources/parser_test.c	2014-10-23 12:33:12 +0000
@@ -0,0 +1,57 @@
++// COMMENT
++#include<stdio.h>
++
++static int
++normal(int a);
++
++static int
++wierd$name(int a);
++
++typedef int (*pointer)(int);
++
++static int
++normal(int a)
++{
++    /* call a normal func,
++     * a stub and a pointer
++     */
++    pointer ptr = wierd$name;
++
++    wierd$name(a);
++    printf("%d\n", a);
++    ptr(a);
++
++    return (a);
++}
++
++static int
++wierd$name(int a)
++{
++    return (a);
++}
++
++static int
++duplicates(int a)
++{
++    pointer ptr1 = wierd$name;
++
++    /* check stubs don't get duplicated */
++    printf("first %d\n", a);
++    printf("second %d\n", a);
++
++    normal(a);
++    normal(a);
++
++    ptr1(a);
++    ptr1(a);
++
++    return (a);
++}
++
++int
++main(void)
++{
++    normal(1);
++    duplicates(1);
++    return (0);
++}
 === added file 'src/sextant/test_resources/parser_test.dump'
 --- src/sextant/test_resources/parser_test.dump	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_resources/parser_test.dump	2014-10-23 12:33:12 +0000
@@ -0,0 +1,44 @@
++Disassembly of section .init:
++080482b4 <_init>:
++ 80482b8:	call   8048350 <__x86.get_pc_thunk.bx>
++ 80482cd:	call   8048300 <__gmon_start__@plt>
++Disassembly of section .plt:
++080482e0 <printf@plt-0x10>:
++080482f0 <printf@plt>:
++08048300 <__gmon_start__@plt>:
++08048310 <__libc_start_main@plt>:
++Disassembly of section .text:
++08048320 <_start>:
++ 804833c:	call   8048310 <__libc_start_main@plt>
++08048350 <__x86.get_pc_thunk.bx>:
++08048360 <deregister_tm_clones>:
++ 8048386:	call   *%eax
++08048390 <register_tm_clones>:
++ 80483c3:	call   *%edx
++080483d0 <__do_global_dtors_aux>:
++ 80483df:	call   8048360 <deregister_tm_clones>
++080483f0 <frame_dummy>:
++ 804840f:	call   *%eax
++0804841d <normal>:
++ 8048430:	call   8048458 <wierd$name>
++ 8048443:	call   80482f0 <printf@plt>
++ 8048451:	call   *%eax
++08048458 <wierd$name>:
++08048460 <duplicates>:
++ 804847b:	call   80482f0 <printf@plt>
++ 804848e:	call   80482f0 <printf@plt>
++ 8048499:	call   804841d <normal>
++ 80484a4:	call   804841d <normal>
++ 80484b2:	call   *%eax
++ 80484bd:	call   *%eax
++080484c4 <main>:
++ 80484d4:	call   804841d <normal>
++ 80484e0:	call   8048460 <duplicates>
++080484f0 <__libc_csu_init>:
++ 80484f6:	call   8048350 <__x86.get_pc_thunk.bx>
++ 804850e:	call   80482b4 <_init>
++ 804853b:	call   *-0xf8(%ebx,%edi,4)
++08048560 <__libc_csu_fini>:
++Disassembly of section .fini:
++08048564 <_fini>:
++ 8048568:	call   8048350 <__x86.get_pc_thunk.bx>
 === added file 'src/sextant/test_sshmanager.py'
 --- src/sextant/test_sshmanager.py	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_sshmanager.py	2014-10-23 12:33:12 +0000
@@ -0,0 +1,72 @@
++#!/usr/bin/python3
++import unittest
++import sshmanager
++import sshmanager
++import os
++sshmanager.TMP_DIR = '/home/benhutc/obj/csvload/src/sextant/test_resources/tmp'
++
++
++class TestSequence(unittest.TestCase):
++    def setUp(self):
++        self.manager = None
++
++    def tearDown(self):
++        if self.manager:
++            self.manager.close()
++            self.manager = None
++
++    def get_manager(self, local_port=9643, remote_host='localhost',
++                    remote_port=9643, ssh_user=None):
++        return sshmanager.SSHManager(local_port, remote_host, remote_port, ssh_user)
++
++    def test_init(self):
++        self.assertRaises(ValueError, self.get_manager, local_port='invalid port')
++        self.assertRaises(ValueError, self.get_manager, remote_port='invalid port')
++
++    def test_connect(self):
++        # make a connection to localhost and ensure that tmp is created
++        self.manager = self.get_manager()
++        self.assertTrue(os.path.isdir(self.manager._tmp_dir))
++        self.manager.close()
++        self.assertFalse(os.path.isdir(self.manager._tmp_dir))
++        self.manager = None
++
++        # check connecion failure
++        self.assertRaises(sshmanager.SSHConnectionError, self.get_manager, remote_host='invalid host')
++
++    def test_files(self):
++        genuine_file = 'test_resources/parser_test.c'
++        genuine_file2 = 'test_resources/parser_test'
++        absent_file = 'absent_file'
++
++        self.manager = self.get_manager()
++        # check sending no files fails
++        self.assertRaises(ValueError, self.manager.send_to_tmp_dir, [])
++        # and sending an non-existent file
++        self.assertRaises(ValueError, self.manager.send_to_tmp_dir, [absent_file, genuine_file])
++
++        self.manager.send_to_tmp_dir([genuine_file, genuine_file2])
++        self.assertTrue(os.path.isfile(os.path.join(self.manager._tmp_dir, genuine_file.split('/')[-1])))
++        self.assertTrue(os.path.isfile(os.path.join(self.manager._tmp_dir, genuine_file2.split('/')[-1])))
++
++        self.manager.remove_from_tmp_dir([genuine_file, genuine_file2])
++        self.assertFalse(os.path.isfile(os.path.join(self.manager._tmp_dir,
++                                               genuine_file.split('/')[-1])))
++        self.assertFalse(os.path.isfile(os.path.join(self.manager._tmp_dir,
++                                               genuine_file2.split('/')[-1])))
++
++
++        self.manager.close()
++        self.manager = None
++
++
++if __name__ == '__main__':
++    # no coverage for:
++    #  specifying ssh user
++    #  scp failure
++    #  an error in closing the ssh connection
++    #  another error in closing the ssh connection
++    #  mkdir failure
++    #  rmdir failure
++    unittest.main()
++
 === modified file 'src/sextant/update_db.py'
 --- src/sextant/update_db.py	2014-09-29 14:01:39 +0000
 +++ src/sextant/update_db.py	2014-10-23 12:33:12 +0000
@@ -5,72 +5,106 @@
  # -----------------------------------------
  # Given a program file to upload, or a program name to delete from the server, does the right thing.
++from __future__ import print_function
++
  __all__ = ("upload_program", "delete_program")
--from .db_api import SextantConnection, Validator
--from .objdump_parser import get_parsed_objects
++from .db_api import SextantConnection
++from .sshmanager import SSHConnectionError
++from .objdump_parser import Parser, run_objdump
  from os import path
++from time import time
++import subprocess
++import sys
  import logging
--
--def upload_program(user_name, file_path, db_url, display_url='',
--                   alternative_name=None, not_object_file=False):
--    """
--    Uploads a program to the remote database.
--
--    Raises requests.exceptions.ConnectionError if the server didn't exist.
--    Raises IOError if file_path doesn't correspond to a file.
--    Raises ValueError if the desired alternative_name (or the default, if no
--    alternative_name was specified) already exists in the database.
--    :param file_path: the path to the local file we wish to upload
--    :param db_url: the URL of the database (eg. http://localhost:7474)
--    :param display_url: alternative URL to display instead of db_url
--    :param alternative_name: a name to give the program to override the default
--    :param object_file: bool(the file is an objdump text output file, rather than a compiled binary)
--
--    """
--
--    if not display_url:
--        display_url = db_url
--
--    # if no name is specified, use the form "<username>-<binary name>"
--    name = alternative_name or (user_name + '-' + path.split(file_path)[-1])
--
--    connection = SextantConnection(db_url)
--
--    program_names = connection.get_program_names()
--    if Validator.sanitise(name) in program_names:
--        raise ValueError("There is already a program with name {}; "
--                         "please delete the previous one with the same name "
--                         "and retry, or rename the input file.".format(name))
--
--    parsed_objects = get_parsed_objects(filepath=file_path,
--                                        sections_to_view=['.text'],
--                                        not_object_file=not_object_file,
--                                        ignore_function_pointers=False)
--
--    logging.info('Objdump has parsed!')
--
--    program_representation = connection.new_program(Validator.sanitise(name))
--
--    for obj in parsed_objects:
--        for called in obj.what_do_i_call:
--            if not program_representation.add_function_call(obj.name, called[-1]): # called is a tuple (address, name)
--                logging.error('Validation error: {} calling {}'.format(obj.name, called[-1]))
--
--    logging.info('Sending {} named objects to server {}...'.format(len(parsed_objects), display_url))
--    program_representation.commit()
--    logging.info('Successfully added {}.'.format(name))
--
--
--def delete_program(program_name, db_url):
--    """
--    Deletes a program with the specified name from the database.
--    :param program_name: the name of the program to delete
--    :param db_url: the URL of the database (eg. http://localhost:7474)
--    :return: bool(success)
--    """
--    connection = SextantConnection(db_url)
++def upload_program(connection, user_name, file_path, program_name=None,
++                   not_object_file=False):
++    """
++    Upload a program's functions and call graph to the database.
++
++    Arguments:
++        connection:
++            The SextantConnection object that manages the database connection.
++        user_name:
++            The user name of the user uploading the program.
++        file_path:
++            The path to either: the output of objdump (if not_object_file is
++            True) OR to a binary file if (not_object_file is False).
++        program_name:
++            An optional name to give the program in the database, if not
++            specified then <user_name>-<file name> will be used.
++        not_object_file:
++            Flag controlling whether file_path is pointing to a dump file or
++            a binary file.
++    """
++    if not connection._ssh:
++        raise SSHConnectionError('An SSH connection is required for '
++                                 'program upload.')
++
++    if not program_name:
++        file_no_ext = path.basename(file_path).split('.')[0]
++        program_name = '{}-{}'.format(user_name, file_no_ext)
++
++
++    if program_name in connection.get_program_names():
++        raise ValueError('A program with name `{}` already exists in the database'
++                         .format(program_name))
++
++
++    print('Uploading `{}` to the database. '
++          'This may take some time for larger programs.'
++          .format(program_name))
++    start = time()
++
++    if not not_object_file:
++        print('Generating dump file...', end='')
++        sys.stdout.flush()
++        file_path, file_object = run_objdump(file_path)
++        print('done.')
++    else:
++        file_object = None
++
++    # Make parser and wire to DBprogram.
++    with connection.new_program(program_name) as program:
++
++        def start_parser(program):
++            print('Parsing dump file...', end='')
++            sys.stdout.flush()
++
++        def finish_parser(parser, program):
++            # Callback to make sure the program's csv files are flushed when
++            # the parser completes.
++            program.func_writer.finish()
++            program.call_writer.finish()
++
++            print('done: {} functions and {} calls.'
++                  .format(parser.function_count, parser.call_count))
++
++        parser = Parser(file_path = file_path, file_object = file_object,
++                        sections=[],
++                        add_function = program.add_function,
++                        add_call = program.add_call,
++                        started=lambda parser: start_parser(program),
++                        finished=lambda parser: finish_parser(parser, program))
++        parser.parse()
++
++        program.commit()
++
++    end = time()
++    print('Finished in {:.2f}s.'.format(end-start))
++
++
++def delete_program(connection, program_name):
++    """
++    Remove the specified program from the database.
++
++    Arguments:
++        connection:
++            The SextantConnection object managing the database connection.
++        program_name:
++            The name of the program to remove from the database.
++    """
      connection.delete_program(program_name)
--    print('Successfully deleted {}.'.format(program_name))
++
 === modified file 'src/sextant/web/server.py'
 --- src/sextant/web/server.py	2014-10-03 11:47:52 +0000
 +++ src/sextant/web/server.py	2014-10-23 12:33:12 +0000
@@ -26,7 +26,8 @@
  from cgi import escape  # deprecated in Python 3 in favour of html.escape, but we're stuck on Python 2
--database_url = None  # the URL to access the database instance
++# global SextantConnection object which deals with the port forwarding
++CONNECTION = None
  RESPONSE_CODE_OK = 200
  RESPONSE_CODE_BAD_REQUEST = 400
@@ -67,25 +68,6 @@
  class SVGRenderer(Resource):
--    def error_creating_neo4j_connection(self, failure):
--        self.write("Error creating Neo4J connection: %s\n") % failure.getErrorMessage()
--
--    @staticmethod
--    def create_neo4j_connection():
--        return db_api.SextantConnection(database_url)
--
--    @staticmethod
--    def check_program_exists(connection, name):
--        return connection.check_program_exists(name)
--
--    @staticmethod
--    def get_whole_program(connection, name):
--        return connection.get_whole_program(name)
--
--    @staticmethod
--    def get_functions_calling(connection, progname, funcname):
--        return connection.get_all_functions_calling(progname, funcname)
--
      @staticmethod
      def get_plot(program, suppress_common_functions=False, remove_self_calls=False):
          graph_dot = export.ProgramConverter.to_dot(program, suppress_common_functions,
@@ -111,7 +93,7 @@
          res_msg = None # set this in the logic
+         #
--        # Get program name and database connection, check if program exists
++        # Check if provided program name exists
+         #
          name = args.get('program_name', [None])[0]
@@ -121,16 +103,7 @@
              res_msg = "Supply 'program_name' parameter."
          if res_code is RESPONSE_CODE_OK:
--            try:
--                conn = yield deferToThread(self.create_neo4j_connection)
--            except requests.exceptions.ConnectionError:
--                res_code = RESPONSE_CODE_BAD_GATEWAY
--                res_fmt = "Could not reach Neo4j server at {}"
--                res_msg = res_fmt.format(database_url)
--                conn = None
--
--        if res_code is RESPONSE_CODE_OK:
--            exists = yield deferToThread(self.check_program_exists, conn, name)
++            exists = yield deferToThread(CONNECTION.check_program_exists, name)
              if not exists:
                  res_code = RESPONSE_CODE_NOT_FOUND
                  res_fmt = "Program {} not found in database."
@@ -146,28 +119,23 @@
          # look for in request.args, both tuples
          queries = {
              'whole_program': (
--                self.get_whole_program,
--                (conn, name),
++                CONNECTION.get_whole_program,
                  ()
              ),
              'functions_calling': (
--                self.get_functions_calling,
--                (conn, name),
++                CONNECTION.get_all_functions_calling,
                  ('func1',)
              ),
              'functions_called_by': (
--                conn.get_all_functions_called,
--                (name,),
++                CONNECTION.get_all_functions_called,
                  ('func1',)
              ),
              'all_call_paths': (
--                conn.get_call_paths,
--                (name,),
++                CONNECTION.get_call_paths,
                  ('func1', 'func2')
              ),
              'shortest_call_path': (
--                conn.get_shortest_path_between_functions,
--                (name,),
++                CONNECTION.get_shortest_path_between_functions,
                  ('func1', 'func2')
+             )
+         }
@@ -186,7 +154,7 @@
          # extract any required keyword arguments from request.args
          if res_code is RESPONSE_CODE_OK:
--            fn, known_args, kwargs = query
++            fn, kwargs = query
              # all args will be strings - use None to indicate missing argument
              req_args = tuple(args.get(kwarg, [None])[0] for kwarg in kwargs)
@@ -202,9 +170,8 @@
          # if we are okay here we have a valid query with all required arguments
          if res_code is RESPONSE_CODE_OK:
              try:
--                all_args = known_args + req_args
                  program = yield defer_to_thread_with_timeout(render_timeout, fn,
--                                                             *all_args)
++                                                             name, *req_args)
              except defer.CancelledError:
                  # the timeout has fired and cancelled the request
                  res_code = RESPONSE_CODE_BAD_REQUEST
@@ -247,16 +214,12 @@
  class GraphProperties(Resource):
      @staticmethod
--    def _get_connection():
--        return db_api.SextantConnection(database_url)
--
--    @staticmethod
--    def _get_program_names(connection):
--        return connection.get_program_names()
--
--    @staticmethod
--    def _get_function_names(connection, program_name):
--        return connection.get_function_names(program_name)
++    def _get_program_names():
++        return CONNECTION.get_program_names()
++
++    @staticmethod
++    def _get_function_names(program_name):
++        return CONNECTION.get_function_names(program_name)
      @defer.inlineCallbacks
      def _render_GET(self, request):
@@ -269,18 +232,9 @@
          query = request.args['query'][0]
--        try:
--            neo4j_connection = yield deferToThread(self._get_connection)
--        except Exception:
--            request.setResponseCode(502)  # Bad Gateway
--            request.write("Could not reach Neo4j server at {}.".format(database_url))
--            request.finish()
--            defer.returnValue(None)
--            neo4j_connection = None  # just to silence the "referenced before assignment" warnings
--
          if query == 'programs':
              request.setHeader("content-type", "application/json")
--            prognames = yield deferToThread(self._get_program_names, neo4j_connection)
++            prognames = yield deferToThread(self._get_program_names)
              request.write(json.dumps(list(prognames)))
              request.finish()
              defer.returnValue(None)
@@ -294,7 +248,7 @@
                  defer.returnValue(None)
              program_name = request.args['program_name'][0]
--            funcnames = yield deferToThread(self._get_function_names, neo4j_connection, program_name)
++            funcnames = yield deferToThread(self._get_function_names, program_name)
              if funcnames is None:
                  request.setResponseCode(404)
                  request.setHeader("content-type", "text/plain")
@@ -319,10 +273,12 @@
          return NOT_DONE_YET
--def serve_site(input_database_url='http://localhost:7474', port=2905):
--
--    global database_url
--    database_url = input_database_url
++def serve_site(connection, port):
++    global CONNECTION
++
++    CONNECTION = connection
++
++
      # serve static directory at root
      root = File(os.path.join(environment.RESOURCES_DIR, 'sextant', 'web'))