Merge lp:~ben-hutchings/ensoft-sextant/csv-upload into lp:ensoft-sextant

Proposed by Ben Hutchings
Status: Merged
Approved by: Robert
Approved revision: 62
Merged at revision: 30
Proposed branch: lp:~ben-hutchings/ensoft-sextant/csv-upload
Merge into: lp:ensoft-sextant
Diff against target: 3573 lines (+1945/-1015)
16 files modified
src/sextant/__main__.py (+54/-232)
src/sextant/csvwriter.py (+152/-0)
src/sextant/db_api.py (+587/-302)
src/sextant/export.py (+3/-3)
src/sextant/objdump_parser.py (+302/-262)
src/sextant/query.py (+29/-31)
src/sextant/sshmanager.py (+278/-0)
src/sextant/test_all.sh (+4/-0)
src/sextant/test_csvwriter.py (+89/-0)
src/sextant/test_db_api.py (+68/-54)
src/sextant/test_parser.py (+85/-0)
src/sextant/test_resources/parser_test.c (+57/-0)
src/sextant/test_resources/parser_test.dump (+44/-0)
src/sextant/test_sshmanager.py (+72/-0)
src/sextant/update_db.py (+96/-62)
src/sextant/web/server.py (+25/-69)
To merge this branch: bzr merge lp:~ben-hutchings/ensoft-sextant/csv-upload
Reviewer Review Type Date Requested Status
Robert Approve
Review via email: mp+239356@code.launchpad.net

Commit message

Programs now upload by first being parsed into csv files, then uploaded from these to the database. This is _significantly_ faster for large programs.

Furthermore, the structure of the program nodes in the database has been changed - whereas before they were unlabelled nodes with type 'program', they are now associated with the 'program' label (the database partitions on label - so this labelling keeps programs distinct from the functions). All queries in sextant have been updated to reflect this.

New module sshmanager handles the ssh connection to the database server.
New module csvwriter deals with the nuts and bolts of the csv files.

Description of the change

Programs now upload by first being parsed into csv files, then uploaded from these to the database. This is _significantly_ faster for large programs.

Furthermore, the structure of the program nodes in the database has been changed - whereas before they were unlabelled nodes with type 'program', they are now associated with the 'program' label (the database partitions on label - so this labelling keeps programs distinct from the functions). All queries in sextant have been updated to reflect this.

New module sshmanager handles the ssh connection to the database server.
New module csvwriter deals with the nuts and bolts of the csv files.

To post a comment you must log in.
Revision history for this message
Robert (rjwills) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/sextant/__main__.py'
--- src/sextant/__main__.py 2014-10-03 13:00:52 +0000
+++ src/sextant/__main__.py 2014-10-23 12:33:12 +0000
@@ -9,7 +9,6 @@
99
10import io10import io
11import sys11import sys
12import random
13import socket12import socket
14import logging13import logging
15import logging.config14import logging.config
@@ -28,10 +27,12 @@
28from . import db_api27from . import db_api
29from . import update_db28from . import update_db
30from . import environment29from . import environment
30from . import sshmanager
3131
32config = environment.load_config()32config = environment.load_config()
3333
3434
35
35def _displayable_url(args):36def _displayable_url(args):
36 """37 """
37 Return the URL specified by the user for Sextant to look at.38 Return the URL specified by the user for Sextant to look at.
@@ -56,7 +57,7 @@
5657
57# Beginning of functions which handle the actual invocation of Sextant58# Beginning of functions which handle the actual invocation of Sextant
5859
59def _start_web(args):60def _start_web(connection, args):
60 # Don't import at top level - makes twisted dependency semi-optional,61 # Don't import at top level - makes twisted dependency semi-optional,
61 # allowing non-web functionality to work with Python 3.62 # allowing non-web functionality to work with Python 3.
62 if sys.version_info[0] == 2:63 if sys.version_info[0] == 2:
@@ -68,12 +69,12 @@
68 logging.info("Serving site on port {}".format(args.port))69 logging.info("Serving site on port {}".format(args.port))
6970
70 # server is .web.server, imported a couple of lines ago71 # server is .web.server, imported a couple of lines ago
71 server.serve_site(input_database_url=args.remote_neo4j, port=args.port)72 server.serve_site(connection, args.port)
7273
7374
74def _audit(args):75def _audit(connection, args):
75 try:76 try:
76 audited = query.audit(args.remote_neo4j)77 audited = query.audit(connection)
77 except requests.exceptions.ConnectionError as e:78 except requests.exceptions.ConnectionError as e:
78 msg = 'Connection error to server {url}: {exception}'79 msg = 'Connection error to server {url}: {exception}'
79 logging.error(msg.format(url=_displayable_url(args), exception=e))80 logging.error(msg.format(url=_displayable_url(args), exception=e))
@@ -87,8 +88,8 @@
87 titles = ("Name", "#Func", "Uploader", "User-ID", "Upload Date")88 titles = ("Name", "#Func", "Uploader", "User-ID", "Upload Date")
88 colminlens = (len(entry) for entry in titles)89 colminlens = (len(entry) for entry in titles)
89 # maximum lengths to avoid one entry from throwing the whole table90 # maximum lengths to avoid one entry from throwing the whole table
90 # date format is <YYYY:MM:DD HH:MM:SS.UUUUUU> = 26 characters91 # date format is <YYYY-MM-DD HH:MM:SS> = 19 characters
91 COLMAXLENS = (25, 5, 25, 10, 26)92 COLMAXLENS = (25, 6, 25, 10, 19)
9293
93 # make a table of the strings of each data entry we will display94 # make a table of the strings of each data entry we will display
94 text = [map(str, (p.program_name, p.number_of_funcs,95 text = [map(str, (p.program_name, p.number_of_funcs,
@@ -120,7 +121,7 @@
120 print('\n'.join(st.format(*pentry) for pentry in text))121 print('\n'.join(st.format(*pentry) for pentry in text))
121122
122123
123def _add_program(args):124def _add_program(connection, args):
124 try:125 try:
125 alternative_name = args.name_in_db[0]126 alternative_name = args.name_in_db[0]
126 except TypeError:127 except TypeError:
@@ -131,12 +132,11 @@
131 # unsupplied132 # unsupplied
132133
133 try:134 try:
134 update_db.upload_program(user_name=getpass.getuser(),135 update_db.upload_program(connection,
135 file_path=args.input_file,136 getpass.getuser(),
136 db_url=args.remote_neo4j,137 args.input_file,
137 alternative_name=alternative_name,138 alternative_name,
138 not_object_file=not_object_file,139 not_object_file)
139 display_url=_displayable_url(args))
140 except requests.exceptions.ConnectionError as e:140 except requests.exceptions.ConnectionError as e:
141 msg = 'Connection error to server {}: {}'141 msg = 'Connection error to server {}: {}'
142 logging.error(msg.format(_displayable_url(args), e))142 logging.error(msg.format(_displayable_url(args), e))
@@ -147,41 +147,41 @@
147 logging.error('Input file {} was not found.'.format(args.input_file[0]))147 logging.error('Input file {} was not found.'.format(args.input_file[0]))
148 logging.error(e)148 logging.error(e)
149 logging.debug(e, exc_info=True)149 logging.debug(e, exc_info=True)
150 except ValueError as e:150 except (ValueError, sshmanager.SSHConnectionError) as e:
151 logging.error(e)151 logging.error(e)
152152
153153
154def _delete_program(namespace):154def _delete_program(connection, args):
155 update_db.delete_program(namespace.program_name,155 update_db.delete_program(connection, args.program_name)
156 namespace.remote_neo4j)156
157157
158158def _make_query(connection, args):
159def _make_query(namespace):
160 arg1 = None159 arg1 = None
161 arg2 = None160 arg2 = None
162 try:161 try:
163 arg1 = namespace.funcs[0]162 arg1 = args.funcs[0]
164 arg2 = namespace.funcs[1]163 arg2 = args.funcs[1]
165 except TypeError:164 except TypeError:
166 pass165 pass
167 except IndexError:166 except IndexError:
168 pass167 pass
169168
170 try:169 try:
171 program_name = namespace.program[0]170 program_name = args.program[0]
172 except TypeError:171 except TypeError:
173 program_name = None172 program_name = None
174173
175 try:174 try:
176 suppress_common = namespace.suppress_common[0]175 suppress_common = args.suppress_common[0]
177 except TypeError:176 except TypeError:
178 suppress_common = False177 suppress_common = False
179178
180 query.query(remote_neo4j=namespace.remote_neo4j,179 query.query(remote_neo4j=args.remote_neo4j,
181 display_neo4j=_displayable_url(namespace),180 display_neo4j=_displayable_url(args),
182 input_query=namespace.query,181 input_query=args.query,
183 program_name=program_name,182 program_name=program_name,
184 argument_1=arg1, argument_2=arg2,183 argument_1=arg1,
184 argument_2=arg2,
185 suppress_common=suppress_common)185 suppress_common=suppress_common)
186186
187# End of functions which invoke Sextant187# End of functions which invoke Sextant
@@ -197,8 +197,10 @@
197197
198 """198 """
199199
200 argumentparser = argparse.ArgumentParser(prog='sextant', usage='sextant', description="Invoke part of the SEXTANT program")200 ap = argparse.ArgumentParser(prog='sextant',
201 subparsers = argumentparser.add_subparsers(title="subcommands")201 usage='sextant',
202 description="Invoke part of the SEXTANT program")
203 subparsers = ap.add_subparsers(title="subcommands")
202204
203 #set what will be defined as a "common function"205 #set what will be defined as a "common function"
204 db_api.set_common_cutoff(config.common_cutoff)206 db_api.set_common_cutoff(config.common_cutoff)
@@ -257,10 +259,9 @@
257 parsers[key].add_argument('--remote-neo4j', metavar="URL",259 parsers[key].add_argument('--remote-neo4j', metavar="URL",
258 help="URL of neo4j server", type=str,260 help="URL of neo4j server", type=str,
259 default=config.remote_neo4j)261 default=config.remote_neo4j)
260 parsers[key].add_argument('--use-ssh-tunnel', metavar="BOOL", type=str,262 parsers[key].add_argument('--no-ssh-tunnel',
261 help="whether to SSH into the remote server,"263 help='Disable ssh tunnelling. Prevents program upload.',
262 "True/False",264 action='store_true')
263 default=str(config.use_ssh_tunnel))
264 parsers[key].add_argument('--ssh-user', metavar="NAME", type=str,265 parsers[key].add_argument('--ssh-user', metavar="NAME", type=str,
265 help="username to use as remote SSH name",266 help="username to use as remote SSH name",
266 default=str(config.ssh_user))267 default=str(config.ssh_user))
@@ -273,207 +274,28 @@
273274
274 # parse the arguments275 # parse the arguments
275276
276 return argumentparser.parse_args()277 return ap.parse_args()
277
278
279def _start_tunnel(local_port, remote_host, remote_port, ssh_user=''):
280 """
281 Creates an SSH port-forward.
282
283 This will result in localhost:local_port appearing to be
284 remote_host:remote_port.
285
286 :param local_port: integer port number to open at localhost
287 :param remote_host: string address of remote host (no port number)
288 :param remote_port: port to 'open' on the remote host
289 :param ssh_user: user to log in on the remote_host as
290
291 """
292
293 if not (isinstance(local_port, int) and local_port > 0):
294 raise ValueError(
295 'Local port {} must be a positive integer.'.format(local_port))
296 if not (isinstance(remote_port, int) and remote_port > 0):
297 raise ValueError(
298 'Remote port {} must be a positive integer.'.format(remote_port))
299
300 logging.debug('Starting SSH tunnel...')
301
302 # this cmd string will be .format()ed in a few lines' time
303 cmd = ['ssh']
304
305 if ssh_user:
306 # ssh -l {user} ... sets the remote login username
307 cmd += ['-l', ssh_user]
308
309 # -L localport:localhost:remoteport forwards the port
310 # -M makes SSH able to accept slave connections
311 # -S sets the location of a control socket (in this case, sextant-controller
312 # with a unique identifier appended, just in case we run sextant twice
313 # simultaneously), so we know how to close the port again
314 # -f goes into background; -N does not execute a remote command;
315 # -T says to remote host that we don't want a text shell.
316 cmd += ['-M',
317 '-S', 'sextantcontroller{tunnel_id}'.format(tunnel_id=local_port),
318 '-fNT',
319 '-L', '{0}:localhost:{1}'.format(local_port, remote_port),
320 remote_host]
321
322 logging.debug('Running {}'.format(' '.join(cmd)))
323
324 exit_code = subprocess.call(cmd)
325 if exit_code:
326 raise OSError('SSH setup failed with error {}'.format(exit_code))
327
328 logging.debug('SSH tunnel created.')
329
330
331def _stop_tunnel(local_port, remote_host):
332 """
333 Tear down an SSH port-forward which was previously set up with start_tunnel.
334
335 We use local_port as an identifier.
336 :param local_port: the port on localhost we are using as the entrypoint
337 :param remote_host: remote host we tunnelled into
338
339 """
340
341 logging.debug('Shutting down SSH tunnel...')
342
343 # ssh -O sends a command to the slave specified in -S
344 cmd = ['ssh',
345 '-S', 'sextantcontroller{}'.format(local_port),
346 '-O', 'exit',
347 '-q', # for quiet
348 remote_host]
349
350 # SSH has a bug on some systems which causes it to ignore the -q flag
351 # meaning it prints "Exit request sent." to stderr.
352 # To avoid this, we grab stderr temporarily, and see if it's that string;
353 # if it is, suppress it.
354 pr = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
355 stdout, stderr = pr.communicate()
356 if stderr.rstrip() != 'Exit request sent.':
357 print(stderr, file=sys.stderr)
358 if pr.returncode == 0:
359 logging.debug('Shut down successfully.')
360 else:
361 logging.warning(
362 'SSH tunnel shutdown returned error code {}'.format(pr.returncode))
363 logging.warning(stderr)
364
365
366def _is_port_used(port):
367 """
368 Checks with the OS to see whether a port is open.
369
370 Beware: port is passed directly to the shell. Make sure it is an integer.
371 We raise ValueError if it is not.
372 :param port: integer port to check for openness
373 :return: bool(port is in use)
374
375 """
376
377 # we follow http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
378 if not (isinstance(port, int) and port > 0):
379 raise ValueError('port {} must be a positive integer.'.format(port))
380
381 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
382 try:
383 sock.bind(('127.0.0.1', port))
384 except socket.error as e:
385 if e.errno == 98: # Address already in use
386 return True
387 raise
388
389 sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
390
391 return False # that is, the port is not used
392
393
394def _get_unused_port():
395 """
396 Returns a port number between 10000 and 50000 which is not currently open.
397
398 """
399
400 keep_going = True
401 while keep_going:
402 portnum = random.randint(10000, 50000)
403 keep_going = _is_port_used(portnum)
404 return portnum
405
406278
407def _get_host_and_port(url):279def _get_host_and_port(url):
408 """Given a URL as http://host:port, returns (host, port)."""280 """Given a URL as http://host:port, returns (host, port)."""
409 parsed = parse.urlparse(url)281 parsed = parse.urlparse(url)
410 return (parsed.hostname, parsed.port)282 return (parsed.hostname, parsed.port)
411
412
413def _is_localhost(host, port):
414 """
415 Checks whether a host is an alias to localhost.
416
417 Raises socket.gaierror if the host was not found.
418
419 """
420
421 addr = socket.getaddrinfo(host, port)[0][4][0]
422
423 return addr in ('127.0.0.1', '::1')
424283
425284
426def main():285def main():
427 args = parse_arguments()286 args = parse_arguments()
428287 remotehost, remoteport = _get_host_and_port(args.remote_neo4j)
429 if args.use_ssh_tunnel.lower() == 'true':288 no_ssh_tunnel = args.no_ssh_tunnel
430 localport = _get_unused_port()289 connection = None
431290
432 remotehost, remoteport = _get_host_and_port(args.remote_neo4j)291 try:
433292 conn_args = (remotehost, remoteport, no_ssh_tunnel)
434 try:293 with db_api.SextantConnection(*conn_args) as connection:
435 is_loc = _is_localhost(remotehost, remoteport)294 args.func(connection, args)
436 except socket.gaierror:295 except sshmanager.SSHConnectionError as e:
437 logging.error('Server {} not found.'.format(remotehost))296 print(e.message)
438 return297
439298
440 if is_loc:
441 # we are attempting to connect to localhost anyway, so we won't
442 # bother to SSH to it.
443 # There may be some ways the user can trick us into trying to SSH
444 # to localhost anyway, but this will do as a first pass.
445 # SSHing to localhost is undesirable because on my test computer,
446 # we get 'connection refused' if we try.
447 args.func(args)
448
449 else: # we need to SSH
450 try:
451 _start_tunnel(localport, remotehost, remoteport,
452 ssh_user=args.ssh_user)
453 except OSError as e:
454 logging.error(str(e))
455 return
456 except KeyboardInterrupt:
457 logging.info('Halting because of user interrupt.')
458 return
459
460 try:
461 args.display_neo4j = args.remote_neo4j
462 args.remote_neo4j = 'http://localhost:{}'.format(localport)
463 args.func(args)
464 except KeyboardInterrupt:
465 # this probably happened because we were running Sextant Web
466 # and Ctrl-C'ed out of it
467 logging.info('Keyboard interrupt detected. Halting.')
468 pass
469
470 finally:
471 _stop_tunnel(localport, remotehost)
472
473 else: # no need to set up the ssh, just run sextant
474 args.func(args)
475
476
477if __name__ == '__main__':299if __name__ == '__main__':
478 main()300 main()
479301
480302
=== added file 'src/sextant/csvwriter.py'
--- src/sextant/csvwriter.py 1970-01-01 00:00:00 +0000
+++ src/sextant/csvwriter.py 2014-10-23 12:33:12 +0000
@@ -0,0 +1,152 @@
1import logging
2
3"""
4Provide a class for writing to row-limited csv files.
5"""
6__all__ = ('CSVWriter',)
7
8
9class CSVWriter(object):
10 """
11 Write to csv files, automatically opening new ones at row maximum.
12
13 Provides a write(*args) method which will add a row to the currently open
14 csv file (internally managed) if there is room in it, otherwise close it,
15 silently open a new one and write to that.
16
17 Attributes:
18 base_path:
19 The base path of the output files - which will have a full path
20 of form "<base_path><number>.csv"
21 headers:
22 A list or tuple of strings which will be used as the column
23 headers. Attempts to write a row of data will induce a check
24 that the length of the data provided is exactly that of this
25 argument.
26 max_rows:
27 The maximum number of rows to write in each file (including the
28 header row) before opening a new file.
29
30 _fmt:
31 The format string which will be used to write a row to the csv
32 file. Of form '{},{},...,{}\n'.
33 _file:
34 The currently open file.
35 _file_count:
36 The number of files that the CSVWriter has written to. The next
37 file to be opened will have name '<base_path><_file_count>.csv'
38 _row_count:
39 The number of rows (including the header row) in the current file.
40 _total_row_count:
41 The number of rows (including the header rows) in ALL files.
42
43 """
44 # Filename fmt of output files - used with .format(base_path, number).
45 _file_fmt = '{}{}.csv'
46
47 def __init__(self, base_path, headers, max_rows):
48 """
49 Initialise the writer for writing.
50
51 Arguments:
52 base_path:
53 The base path of the output files - which will have a full path
54 of form "<base_path><number>.csv"
55 headers:
56 A list or tuple of strings which will be used as the column
57 headers. Attempts to write a row of data will induce a check
58 that the length of the data provided is exactly that of this
59 argument.
60 max_rows:
61 The maximum number of rows to write in each file (including the
62 header row) before opening a new file.
63 """
64 self.base_path = base_path
65 self.headers = headers
66 self.max_rows = max_rows
67
68 self._fmt = ','.join('{}' for h in headers) + '\n'
69
70 # The number of the file we are on and the line in it.
71 self._file = None
72 self._file_count = 0
73 self._row_count = 0
74
75 self._total_row_count = 0
76
77 self._open_new_file()
78
79 def _open_new_file(self):
80 """
81 Open a new file for editing, writing the headers in the first row.
82 """
83 self._close_file()
84
85 path = CSVWriter._file_fmt.format(self.base_path, self._file_count)
86 self._file = open(path, 'w+')
87 self._file_count += 1
88 self.write(*self.headers)
89
90 def _close_file(self):
91 """
92 Close the current file.
93
94 NOTE that this method should ALWAYS be called before attempting to read
95 from the file as it ensures that all changes have been written to disk,
96 not only buffered.
97 """
98 if self._file and not self._file.closed:
99 logging.debug('csvwriter wrote {} lines to {}'
100 .format(self._row_count, self._file.name))
101 self._file.close()
102
103 self._row_count = 0
104
105 def write(self, *args):
106 """
107 Add a row the to current file, or to a new one if max_rows is reached.
108
109 The check against max_rows is made BEFORE writing the line.
110
111 Raises:
112 ValueError:
113 If the length of *args is not exactly the length of
114 self.headers - i.e. on attempt to write too many/too few items.
115
116 Arguments:
117 *args:
118 Strings, which will be written into the columns of the current
119 open csv file.
120 """
121 if not len(args) == len(self.headers):
122 msg = 'Attempted to write {} entries to file {} with {} columns'
123 raise ValueError(msg.format(len(args), self.base_path,
124 len(self.headers)))
125
126 if self._row_count == self.max_rows:
127 self._close_file()
128 self._open_new_file()
129
130 self._file.write(self._fmt.format(*args))
131 self._row_count += 1
132 self._total_row_count += 1
133
134 def file_iter(self):
135 """
136 Return an iterator over the names of the files the writer has
137 written to.
138 """
139 fmt = CSVWriter._file_fmt
140 return (fmt.format(self.base_path, i) for i in range(self._file_count))
141
142 def finish(self):
143 """
144 Flush and close the current file. If a subsequent call to self.write
145 is made, a new file will be created to contain it.
146
147 Return the number of files we have written to and the total number
148 of lines we have written.
149 """
150 self._close_file()
151 return self._file_count, self._total_row_count
152
0153
=== modified file 'src/sextant/db_api.py'
--- src/sextant/db_api.py 2014-09-03 14:10:07 +0000
+++ src/sextant/db_api.py 2014-10-23 12:33:12 +0000
@@ -5,208 +5,348 @@
5# -----------------------------------------5# -----------------------------------------
6# API to interact with a Neo4J server: upload, query and delete programs in a DB6# API to interact with a Neo4J server: upload, query and delete programs in a DB
77
8__all__ = ("Validator", "AddToDatabase", "FunctionQueryResult", "Function",8from __future__ import print_function
9
10__all__ = ("validate_query", "DBProgram", "FunctionQueryResult", "Function",
9 "SextantConnection")11 "SextantConnection")
1012
13from sys import stdout
14
11import re # for validation of function/program names15import re # for validation of function/program names
12import logging16import logging
13from datetime import datetime17from datetime import datetime
14import os18import os
15import getpass19import getpass
16from collections import namedtuple20from collections import namedtuple
1721import random
18from neo4jrestclient.client import GraphDatabase22import socket
19import neo4jrestclient.client as client23
2024import itertools
25import subprocess
26from time import time
27
28import neo4jrestclient.client as neo4jrestclient
29
30from sshmanager import SSHManager, SSHConnectionError
31from csvwriter import CSVWriter
32
33# The directory on the local machine to which csv files will be written
34# prior to copy over to the remote server.
35TMP_DIR = '/tmp/sextant'
36
37# A function is deemed 'common' if it has more than this
38# many connections.
21COMMON_CUTOFF = 1039COMMON_CUTOFF = 10
22# a function is deemed 'common' if it has more than this40
23# many connections41
2442
2543def set_common_cutoff(common_def):
26class Validator():44 """
27 """ Sanitises/checks strings, to prevent Cypher injection attacks"""45 Sets the number of incoming connections at which we deem a function 'common'
2846 Default is 10 (which is used if this method is never called).
29 @staticmethod47 :param common_def: number of incoming connections
30 def validate(input_):48 """
31 """49 global COMMON_CUTOFF
32 Checks whether we can allow a string to be passed into a Cypher query.50 COMMON_CUTOFF = common_def
33 :param input_: the string we wish to validate51
34 :return: bool(the string is allowed)52
35 """53def validate_query(string):
36 regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')54 """
37 return bool(regex.match(input_))55 Checks whether we can allow a string to be passed into a Cypher query.
3856 :param string: the string we wish to validate
39 @staticmethod57 :return: bool(the string is allowed)
40 def sanitise(input_):58 """
41 """59 regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')
42 Strips harmful characters from the given string.60 return bool(regex.match(string))
43 :param input_: string to sanitise61
44 :return: the sanitised string62
45 """63class DBProgram(object):
46 return re.sub(r'[^\.\-_a-zA-Z0-9]+', '', input_)64 """
4765 Representation of a program in the database.
4866
49class AddToDatabase():67 Provides add_function and add_call methods which locally register functions
50 """Updates the database, adding functions/calls to a given program"""68 and calls. The commit method uploads everything to the database.
5169
52 def __init__(self, program_name='', sextant_connection=None,70 Attributes:
53 uploader='', uploader_id='', date=None):71 uploader, uploader_id, program_name, date:
54 """72 As in __init__.
55 Object which can be used to add functions and calls to a new program73
56 :param program_name: the name of the new program to be created74 _conn:
57 (must already be validated against Validator)75 The SextantConnection object managing the database connection.
58 :param sextant_connection: the SextantConnection to use for connections76 _ssh:
59 :param uploader: string identifier of user who is uploading77 The SSHManager object belonging to the SextantConnection.
60 :param uploader_id: string Unix user-id of logged-in user78 _db:
61 :param date: string date of today79 The database object belonging to the SextantConnection.
62 """80
63 # program_name must be alphanumeric, to avoid injection attacks easily81 _tmp_dir:
64 if not Validator.validate(program_name):82 The user-specific location of the local temporary directory.
65 return83
84 func_writer:
85 A CSVWriter object which manages the csv files containing the
86 list of functions in the program.
87 call_writer:
88 A CSVWriter object which manages the csv files containing the
89 list of function calls in the program.
90
91 add_func_query:
92 A string for the cypher query used to create functions from a csv
93 file.
94 add_call_query:
95 A string for the cypher query used to create funciton calls from
96 a csv file.
97 add_program_query:
98 A string for the cypher query used to create the program node.
99 """
100
101 def __init__(self, connection, program_name, uploader, uploader_id, date):
102 """
103 Initialise the database program.
104
105 A local temporary folder is created at 'TMP_DIR-<user_name>'.
106 When functions or calls are added via the add_function/call methods,
107 they are registered in csv files which are stored in this directory.
108
109 Committing the program copies these files to the neo4j server and
110 cleans the local tmp folder.
111
112 Raises:
113 ValueError:
114 If the program_name is not alphanumeric.
115 CommandError:
116 If the command to create the temporary directory failed.
117
118 Arguments:
119 connection:
120 The SextantConnection object which manages the connection to
121 the database.
122 program_name:
123 The name to register the program under in the database. Must be
124 alphanumeric.
125 uploader:
126 The name of the user who uploaded the program.
127 uploader_id:
128 A numeric id of the user who uploaded the program.
129 date:
130 A string representing the upload date.
131 """
132 # Ensure an alphanumeric program name.
133 if not validate_query(program_name):
134 raise ValueError('program name must be alphanumeric, got: {}'
135 .format(program_name));
136
137 self.uploader = uploader
138 self.uploader_id = uploader_id
66139
67 self.program_name = program_name140 self.program_name = program_name
68 self.parent_database_connection = sextant_connection141 self.date = date
69 self._functions = {}142
70 self._funcs_tx = None # transaction for uploading functions143 self._conn = connection
71 self._calls_tx = None # transaction for uploading relationships144 self._ssh = connection._ssh
72145 self._db = connection._db
73 if self.parent_database_connection:146
74 # we'll locally use db for short147 self._tmp_dir = '{}-{}'.format(TMP_DIR, getpass.getuser())
75 db = self.parent_database_connection._db148
76149 # Make the local tmp file - csv files will be written into here.
77 parent_function = db.nodes.create(name=program_name,150 try:
78 type='program',151 os.makedirs(self._tmp_dir)
79 uploader=uploader,152 except OSError as e:
80 uploader_id=uploader_id,153 if e.errno == os.errno.EEXIST: # File already exists.
81 date=date)154 pass
82 self._parent_id = parent_function.id155 else:
83156 raise e
84 self._funcs_tx = db.transaction(using_globals=False, for_query=True)157
85 self._calls_tx = db.transaction(using_globals=False, for_query=True)158
86159 tmp_path = os.path.join(self._tmp_dir, '{}_{{}}'.format(program_name))
87 self._connections = []160
88161 self.func_writer = CSVWriter(tmp_path.format('funcs'),
89 @staticmethod162 headers=['name', 'type'],
90 def _get_display_name(function_name):163 max_rows=5000)
91 """164 self.call_writer = CSVWriter(tmp_path.format('calls'),
92 Gets the name we will display to the user for this function name.165 headers=['caller', 'callee'],
93166 max_rows=5000)
94 For instance, if function_name were __libc_start_main@plt, we would167
95 return ("__libc_start_main", "plt_stub"). The returned function type is168 # Define the queries we use to upload the functions and calls.
96 currently one of "plt_stub", "function_pointer" or "normal".169 self.add_func_query = (' USING PERIODIC COMMIT 250'
97170 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
98 :param function_name: the name straight from objdump of a function171 ' WITH line, toInt(line.id) as lineid'
99 :return: ("display name", "function type")172 ' MATCH (n:program {{name: "{}"}})'
100173 ' CREATE (n)-[:subject]->(m:func {{name: line.name,'
101 """174 ' id: lineid, type: line.type}})')
102175
103 if function_name[-4:] == "@plt":176 self.add_call_query = (' USING PERIODIC COMMIT 250'
104 display_name = function_name[:-4]177 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
105 function_group = "plt_stub"178 ' MATCH (p:program {{name: "{}"}})'
106 elif function_name[:20] == "_._function_pointer_":179 ' MATCH (p)-[:subject]->(n:func {{name: line.caller}})'
107 display_name = function_name180 ' USING INDEX n:func(name)'
108 function_group = "function_pointer"181 ' MATCH (p)-[:subject]->(m:func {{name: line.callee}})'
109 else:182 ' USING INDEX m:func(name)'
110 display_name = function_name183 ' CREATE (n)-[r:calls]->(m)')
111 function_group = "normal"184
112185 self.add_program_query = ('CREATE (p:program {{name: "{}", uploader: "{}", '
113 return display_name, function_group186 ' uploader_id: "{}", date: "{}",'
114187 ' function_count: {}, call_count: {}}})')
115 def add_function(self, function_name):188
116 """189
117 Adds a function to the program, ready to be sent to the remote database.190 def __enter__(self):
118 If the function name is already in use, this method effectively does191 """
119 nothing and returns True.192 Allow DBProgram to be used as a context manager.
120193 """
121 :param function_name: a string which must be alphanumeric194 return self
122 :return: True if the request succeeded, False otherwise195
123 """196 def __exit__(self, etype, evalue, etrace):
124 if not Validator.validate(function_name):197 """
125 return False198 Make sure that all files are properly closed.
126 if self.class_contains_function(function_name):199 """
127 return True200 self.func_writer.finish()
128201 self.call_writer.finish()
129 display_name, function_group = self._get_display_name(function_name)202
130203 # Propagate the error if there is one.
131 query = ('START n = node({}) '204 return False if etype is not None else True
132 'CREATE (n)-[:subject]->(m:func {{type: "{}", name: "{}"}}) '205
133 'RETURN m.name, id(m)')206 def add_function(self, name, typ='normal'):
134 query = query.format(self._parent_id, function_group, display_name)207 """
135208 Add a function.
136 self._funcs_tx.append(query)209
137210 Arguments:
138 self._functions[function_name] = function_name211 name:
139212 The name of the function.
140 return True213 typ:
141214 The type of the function, may be any string, but standard types
142 def class_contains_function(self, function_to_find):215 are:
143 """216 normal: we have the disassembly for this function
144 Checks whether we contain a function with a given name.217 stub: we have the name but not the disassembly - usually
145 :param function_to_find: string name of the function we wish to look up218 an imported library function.
146 :return: bool(the function exists in this AddToDatabase)219 pointer: we know only that the function exists, not its
147 """220 name or details.
148 return function_to_find in self._functions221 """
149222 self.func_writer.write(name, typ)
150 def class_contains_call(self, function_calling, function_called):223
151 """224 def add_call(self, caller, callee):
152 Checks whether we contain a call between the two named functions.225 """
153 :param function_calling: string name of the calling-function226 Add a function call.
154 :param function_called: string name of the called function227
155 :return: bool(function_calling calls function_called in us)228 Arguments:
156 """229 caller:
157 return (function_calling, function_called) in self._connections230 The name of the function making the call.
158231 callee:
159 def add_function_call(self, fn_calling, fn_called):232 The name of the function called.
160 """233 """
161 Adds a function call to the program, ready to be sent to the database.234 self.call_writer.write(caller, callee)
162 Effectively does nothing if there is already a function call between235
163 these two functions.236
164 Function names must be alphanumeric for easy security purposes;237 def _copy_local_to_remote_tmp_dir(self):
165 returns False if they fail validation.238 """
166 :param fn_calling: the name of the calling-function as a string.239 Move local tmp files to the server ready for upload.
167 It should already exist in the AddToDatabase; if it does not,240
168 this method will create a stub for it.241 Return a tuple of iterators, the first over the paths on the remote
169 :param fn_called: name of the function called by fn_calling.242 machine of the function files, and the second over the paths of the
170 If it does not exist, we create a stub representation for it.243 call files.
171 :return: True if successful, False otherwise244 """
172 """245 print('Sending files to remote server...', end='')
173 if not all((Validator.validate(fn_calling),246 stdout.flush()
174 Validator.validate(fn_called))):247 remote_funcs = self._ssh.send_to_tmp_dir(self.func_writer.file_iter())
175 return False248 remote_calls = self._ssh.send_to_tmp_dir(self.call_writer.file_iter())
176249 print('finished.')
177 if not self.class_contains_function(fn_called):250 return remote_funcs, remote_calls
178 self.add_function(fn_called)251
179 if not self.class_contains_function(fn_calling):252 def _clean_tmp_files(self, remote_paths):
180 self.add_function(fn_calling)253 """
181254 Delete temporary files on the local and remote machine.
182 if not self.class_contains_call(fn_calling, fn_called):255
183 self._connections.append((fn_calling, fn_called))256 Arguments:
184257 remote_paths:
185 return True258 A list of the paths of the remote fils.
259 """
260 print('Cleaning temporary files...', end='')
261 file_paths = list(itertools.chain(self.func_writer.file_iter(),
262 self.call_writer.file_iter()))
263
264 for path in file_paths:
265 os.remove(path)
266
267 os.rmdir(self._tmp_dir)
268
269 try:
270 # If the parent sextant temp folder is empty, remove it.
271 os.rmdir(TMP_DIR)
272 except:
273 # There is other stuff in TMP_DIR (i.e. from other users), so
274 # leave it.
275 pass
276
277 self._ssh.remove_from_tmp_dir(remote_paths)
278
279 print('done.')
280
281 def _create_db_constraints(self):
282 """
283 Create indexes in the database on program and function names.
284
285 The program name index is a constraint, which will also garuantee the
286 uniqueness of program names.
287 """
288 # Prepare a transaction object which we use to execute cypher queries.
289 tx = self._db.transaction(using_globals=False, for_query=True)
290
291 tx.append('CREATE CONSTRAINT ON (p:program) ASSERT p.name IS UNIQUE')
292 tx.append('CREATE INDEX ON :func(name)')
293
294 # Apply the transaction.
295 tx.commit()
186296
187 def commit(self):297 def commit(self):
188 """298 """
189 Call this when you are finished with the object.299 Insert the program into the database.
190 Changes are not synced to the remote database until this is called.300
301 Move the local temp files created by our func_writer and call_writer
302 to the database server's temp directory. From there use cypher queries
303 to upload them into the database, before cleaning them up.
191 """304 """
192 functions = self._funcs_tx.commit() # send off the function names305 # Ensure that the most recent files are flushed and closed.
193306 func_file_count, func_line_count = self.func_writer.finish()
194 # now functions is a list of QuerySequence objects, which each have a307 call_file_count, call_line_count = self.call_writer.finish()
195 # .elements property which produces [['name', id]]308
196309 # Account for the header line at the top of each file.
197 id_funcs = dict([seq.elements[0] for seq in functions])310 func_count = func_line_count - func_file_count
198 logging.info('Functions uploaded. Uploading calls...')311 call_count = call_line_count - call_file_count
199312
200 # so id_funcs is a dict with id_funcs['name'] == id313 # Get the remote path names as iterators, then make lists of them
201 for call in self._connections:314 # so that we can iterate over them more than once.
202 query = ('MATCH n WHERE id(n) = {} '315 remote_f_iter, remote_c_iter = self._copy_local_to_remote_tmp_dir()
203 'MATCH m WHERE id(m) = {} '316 remote_funcs, remote_calls = map(list, (remote_f_iter, remote_c_iter))
204 'CREATE (n)-[:calls]->(m)')317
205 query = query.format(id_funcs[self._get_display_name(call[0])[0]],318 # Create the indexes and constraints in the database.
206 id_funcs[self._get_display_name(call[1])[0]])319 self._create_db_constraints()
207 self._calls_tx.append(query)320
208321
209 self._calls_tx.commit()322 try:
323 tx = self._db.transaction(using_globals=False, for_query=True)
324
325 # Create the program node in the database.
326 tx.append(self.add_program_query.format(self.program_name, self.uploader,
327 self.uploader_id, self.date,
328 func_count, call_count))
329 tx.commit()
330
331 # Create the functions.
332 for files, query, descr in zip((remote_funcs, remote_calls),
333 (self.add_func_query, self.add_call_query),
334 ('funcs', 'calls')):
335 start = time()
336 for i, path in enumerate(files):
337 completed = int(100*float(i+1)/len(files))
338
339 print('\rUploading {}: {}%'.format(descr, completed), end='')
340 stdout.flush()
341
342 tx.append(query.format(path, self.program_name))
343 tx.commit()
344 end = time()
345 print(' done.')
346
347 finally:
348 # Cleanup temporary folders
349 self._clean_tmp_files(remote_funcs + remote_calls)
210350
211351
212class FunctionQueryResult:352class FunctionQueryResult:
@@ -219,7 +359,7 @@
219 self._update_common_functions()359 self._update_common_functions()
220360
221 def __eq__(self, other):361 def __eq__(self, other):
222 # we make a dictionary so that we can perform easy comparison362 # We make a dictionary so that we can perform easy comparison.
223 selfdict = {func.name: func for func in self.functions}363 selfdict = {func.name: func for func in self.functions}
224 otherdict = {func.name: func for func in other.functions}364 otherdict = {func.name: func for func in other.functions}
225365
@@ -243,20 +383,20 @@
243 if rest_output is None or not rest_output.elements:383 if rest_output is None or not rest_output.elements:
244 return []384 return []
245385
246 # how we store this is: a dict386 # How we store this is: a dict
247 # with keys 'functionname'387 # with keys 'functionname'
248 # and values [the function object we will use,388 # and values [the function object we will use,
249 # and a set of (function names this function calls),389 # and a set of (function names this function calls),
250 # and numeric ID of this node in the Neo4J database]390 # and numeric ID of this node in the Neo4J database].
251391
252 result = {}392 result = {}
253393
254 # initial pass for names of functions394 # Initial pass for names of functions.
255395
256 # if the following assertion failed, we've probably called db.query396 # If the following assertion failed, we've probably called db.query
257 # to get it to not return client.Node objects, which is wrong.397 # to get it to not return client.Node objects, which is wrong.
258 # we attempt to handle this a bit later; this should never arise, but398 # we attempt to handle this a bit later; this should never arise, but
259 # we can cope with it happening in some cases, like the test suite399 # we can cope with it happening in some cases, like the test suite.
260400
261 if type(rest_output.elements) is not list:401 if type(rest_output.elements) is not list:
262 logging.warning('Not a list: {}'.format(type(rest_output.elements)))402 logging.warning('Not a list: {}'.format(type(rest_output.elements)))
@@ -264,11 +404,12 @@
264 for node_list in rest_output.elements:404 for node_list in rest_output.elements:
265 assert(isinstance(node_list, list))405 assert(isinstance(node_list, list))
266 for node in node_list:406 for node in node_list:
267 if isinstance(node, client.Node):407 if isinstance(node, neo4jrestclient.Node):
268 name = node.properties['name']408 name = node.properties['name']
269 node_id = node.id409 node_id = node.id
270 node_type = node.properties['type']410 node_type = node.properties['type']
271 else: # this is the handling we mentioned earlier;411 else:
412 # This is the handling we mentioned earlier;
272 # we are a dictionary instead of a list, as for some413 # we are a dictionary instead of a list, as for some
273 # reason we've returned Raw rather than Node data.414 # reason we've returned Raw rather than Node data.
274 # We should never reach this code, but just in case.415 # We should never reach this code, but just in case.
@@ -283,7 +424,7 @@
283 set(),424 set(),
284 node_id]425 node_id]
285426
286 # end initialisation of names-dictionary427 # End initialisation of names-dictionary.
287428
288 if self._parent_db_connection is not None:429 if self._parent_db_connection is not None:
289 # This is the normal case, of extracting results from a server.430 # This is the normal case, of extracting results from a server.
@@ -301,7 +442,7 @@
301 logging.debug('exec')442 logging.debug('exec')
302 results = new_tx.execute()443 results = new_tx.execute()
303444
304 # results is a list of query results, each of those being a list of445 # Results is a list of query results, each of those being a list of
305 # calls.446 # calls.
306447
307 for call_list in results:448 for call_list in results:
@@ -315,7 +456,7 @@
315 # recall: set union is denoted by |456 # recall: set union is denoted by |
316457
317 else:458 else:
318 # we don't have a parent database connection.459 # We don't have a parent database connection.
319 # This has probably arisen because we created this object from a460 # This has probably arisen because we created this object from a
320 # test suite, or something like that.461 # test suite, or something like that.
321 for node in rest_output.elements:462 for node in rest_output.elements:
@@ -353,19 +494,10 @@
353 func_list = [func for func in self.functions if func.name == name]494 func_list = [func for func in self.functions if func.name == name]
354 return None if len(func_list) == 0 else func_list[0]495 return None if len(func_list) == 0 else func_list[0]
355496
356
357def set_common_cutoff(common_def):
358 """
359 Sets the number of incoming connections at which we deem a function 'common'
360 Default is 10 (which is used if this method is never called).
361 :param common_def: number of incoming connections
362 """
363 global COMMON_CUTOFF
364 COMMON_CUTOFF = common_def
365
366
367class Function(object):497class Function(object):
368 """Represents a function which might appear in a FunctionQueryResult."""498 """
499 Represents a function which might appear in a FunctionQueryResult.
500 """
369501
370 def __eq__(self, other):502 def __eq__(self, other):
371 funcs_i_call_list = {func.name for func in self.functions_i_call}503 funcs_i_call_list = {func.name for func in self.functions_i_call}
@@ -393,11 +525,11 @@
393 self.name = function_name525 self.name = function_name
394 self.is_common = False526 self.is_common = False
395 self._number_calling_me = 0527 self._number_calling_me = 0
396 # care: _number_calling_me is not automatically updated, except by528 # Care: _number_calling_me is not automatically updated, except by
397 # any invocation of FunctionQueryResult._update_common_functions.529 # any invocation of FunctionQueryResult._update_common_functions.
398530
399531
400class SextantConnection:532class SextantConnection(object):
401 """533 """
402 RESTful connection to a remote database.534 RESTful connection to a remote database.
403 It can be used to create/delete/query programs.535 It can be used to create/delete/query programs.
@@ -406,56 +538,214 @@
406 ProgramWithMetadata = namedtuple('ProgramWithMetadata',538 ProgramWithMetadata = namedtuple('ProgramWithMetadata',
407 ['uploader', 'uploader_id',539 ['uploader', 'uploader_id',
408 'program_name', 'date', 540 'program_name', 'date',
409 'number_of_funcs'])541 'number_of_funcs', 'number_of_calls'])
410542
411 def __init__(self, url):543 @staticmethod
412 self.url = url544 def _is_localhost(host, port):
413 self._db = GraphDatabase(url)545 """
414546 Checks whether a host is an alias to localhost.
415 def new_program(self, name_of_program):547
548 Raises socket.gaierror if the host was not found.
549 """
550 addr = socket.getaddrinfo(host, port)[0][4][0]
551 return addr in ('127.0.0.1', '::1')
552
553 @staticmethod
554 def _is_port_used(port):
555 """
556 Checks with the OS to see whether a port is open.
557
558 Beware: port is passed directly to the shell. Make sure it is an integer.
559 We raise ValueError if it is not.
560 :param port: integer port to check for openness
561 :return: bool(port is in use)
562 """
563 result = False
564
565 # We follow:
566 # http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
567 if not (isinstance(port, int) and port > 0):
568 raise ValueError('port {} must be a positive integer.'.format(port))
569
570 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
571 try:
572 sock.bind(('127.0.0.1', port))
573 except socket.error as e:
574 if e.errno == os.errno.EADDRINUSE:
575 result = True
576 else:
577 raise
578
579 sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
580
581 return result # that is, the port is not used
582
583 @staticmethod
584 def _get_unused_port():
585 """
586 Returns a port number between 10000 and 50000 which is not currently open.
587 """
588
589 keep_going = True
590 while keep_going:
591 portnum = random.randint(10000, 50000)
592 keep_going = SextantConnection._is_port_used(portnum)
593 return portnum
594
595
596 def __enter__(self):
597 return self
598
599 def __exit__(self, etype, evalue, etrace):
600 self.close()
601 return False if etype is not None else True
602
603
604 def __init__(self, remotehost, remoteport, no_ssh_tunnel=False):
605 """
606 Initialise the database and ssh connections.
607
608 Arguments:
609 remotehost:
610 The remote host name to connect to.
611 remoteport:
612 The port number to connect to on the remote host.
613 no_ssh_tunnel:
614 Disables the SSHManager if True. Prevents program upload.
615 """
616
617 self.remote_host = remotehost
618 self.remote_port = remoteport
619
620
621 self._no_ssh_tunnel = no_ssh_tunnel
622 self._ssh = None
623 self._db = None
624
625 self.open()
626
627 def open(self):
628 local_port = SextantConnection._get_unused_port()
629 is_localhost = SextantConnection._is_localhost(self.remote_host, self.remote_port)
630
631 if self._no_ssh_tunnel and not is_localhost:
632 raise SSHConnectionError('Cannot connect to the remote database '
633 'without an ssh connection.')
634 else:
635 # Either we are making an ssh tunnel or we are contacting localhost.
636 self._ssh = SSHManager(local_port,
637 self.remote_host,
638 self.remote_port,
639 is_localhost=is_localhost)
640
641 port = self.remote_port if is_localhost else local_port
642 url = 'http://localhost:{}'.format(port)
643
644 self._db = neo4jrestclient.GraphDatabase(url)
645
646 def close(self):
647 """
648 Close the ssh connection to clean up its resources.
649 """
650 if self._ssh:
651 self._ssh.close()
652
653 def new_program(self, program_name):
416 """654 """
417 Request that the remote database create a new program with the given name.655 Request that the remote database create a new program with the given name.
418 This procedure will create a new program remotely; you can manipulate656 This procedure will create a new program remotely; you can manipulate
419 that program using the returned AddToDatabase object.657 that program using the returned DBProgram object.
420 The name can appear in the database already, but this is not recommended658 The name can appear in the database already, but this is not recommended
421 because then delete_program will not know which to delete. Check first659 because then delete_program will not know which to delete. Check first
422 using self.check_program_exists.660 using self.check_program_exists.
423 The name specified must pass Validator.validate()ion; this is a measure661 The name specified must pass validate_query()ion; this is a measure
424 to prevent Cypher injection attacks.662 to prevent Cypher injection attacks.
425 :param name_of_program: string program name663 :param program_name: string program name
426 :return: AddToDatabase instance if successful664 :return: DBProgram instance if successful
427 """665 """
428666
429 if not Validator.validate(name_of_program):667 if not validate_query(program_name):
430 raise ValueError(668 raise ValueError("{} is not a valid program name"
431 "{} is not a valid program name".format(name_of_program))669 .format(program_name))
432 670
433 uploader = getpass.getuser()671 uploader = getpass.getuser()
434 uploader_id = os.getuid()672 uploader_id = os.getuid()
435673 timestr = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
436 return AddToDatabase(sextant_connection=self,674
437 program_name=name_of_program,675 return DBProgram(self, program_name, uploader,
438 uploader=uploader, uploader_id=uploader_id,676 uploader_id, date=timestr)
439 date=str(datetime.now()))677
440678 def delete_program(self, program_name):
441 def delete_program(self, name_of_program):
442 """679 """
443 Request that the remote database delete a specified program.680 Request that the remote database delete a specified program.
444 :param name_of_program: a string which must be alphanumeric only681 :param program_name: a string which must be alphanumeric only
445 :return: bool(request succeeded)682 :return: bool(request succeeded)
446 """683 """
447 if not Validator.validate(name_of_program):684 if not program_name in self.get_program_names():
448 return False685 print('No program `{}` in the database'.format(program_name))
449686 return True
450 q = """MATCH (n) WHERE n.name= "{}" AND n.type="program"687 else:
451 OPTIONAL MATCH (n)-[r]-(b) OPTIONAL MATCH (b)-[rel]-()688 print('Deleting `{}` from the database. '
452 DELETE b,rel DELETE n, r""".format(name_of_program)689 'This may take some time for larger programs.'
453690 .format(program_name))
454 self._db.query(q)691
692 start = time()
693 tx = self._db.transaction(using_globals=False, for_query=True)
694
695 count_query = (' MATCH (p:program {{name: "{}"}})'
696 ' RETURN p.function_count, p.call_count'
697 .format(program_name))
698
699 tx.append(count_query)
700 func_count, call_count = tx.commit()[0].elements[0]
701
702 del_call_query = ('OPTIONAL MATCH (p:program {{name: "{}"}})'
703 '-[:subject]->(f:func)-[c:calls]->()'
704 ' WITH c LIMIT 5000 DELETE c RETURN count(distinct(c))'
705 .format(program_name))
706
707 del_func_query = ('OPTIONAL MATCH (p:program {{name: "{}"}})'
708 '-[s:subject]->(f:func)'
709 ' WITH s, f LIMIT 5000 DELETE s, f RETURN count(f)'
710 .format(program_name))
711
712 del_prog_query = ('MATCH (p:program {{name: "{}"}}) DELETE p'
713 .format(program_name))
714
715 # Delete calls first, a node may not be deleted until all relationships
716 # referencing it are deleted.
717 for count, query, descr in zip((call_count, func_count),
718 (del_call_query, del_func_query),
719 ('calls', 'funcs')):
720 # Change tracks whether the last delete did anything. We would
721 # like to use: while done < count: ..., but if the program has
722 # already been partially deleted then this will never terminate.
723 # Furthermore, if there are no functions or no calls, the while
724 # loop will be appropriately skipped.
725 change = count
726 done = 0
727 while change:
728 completed = int(100 * float(done)/count)
729 print('\rDeleting {}: {}%'.format(descr, completed), end='')
730 stdout.flush()
731
732 tx.append(query)
733 change = tx.commit()[0].elements[0][0]
734 done += change
735 if done:
736 print(' done.')
737
738 # Delete the program node.
739 tx.append(del_prog_query)
740 tx.commit()
741
742 end = time()
743 print('Finished in {:.2f}s.'.format(end - start))
455744
456 return True745 return True
457746
458 def _execute_query(self, prog_name='', query=''):747
748 def _execute_query(self, prog_name, query):
459 """749 """
460 Executes a Cypher query against the remote database.750 Executes a Cypher query against the remote database.
461 Note that this returns a FunctionQueryResult, so is unsuitable for any751 Note that this returns a FunctionQueryResult, so is unsuitable for any
@@ -468,7 +758,7 @@
468 :param query: verbatim query we wish the server to execute758 :param query: verbatim query we wish the server to execute
469 :return: a FunctionQueryResult corresponding to the server's output759 :return: a FunctionQueryResult corresponding to the server's output
470 """760 """
471 rest_output = self._db.query(query, returns=client.Node)761 rest_output = self._db.query(query, returns=neo4jrestclient.Node)
472762
473 return FunctionQueryResult(parent_db=self._db,763 return FunctionQueryResult(parent_db=self._db,
474 program_name=prog_name,764 program_name=prog_name,
@@ -481,12 +771,11 @@
481 method which requires a program-name input.771 method which requires a program-name input.
482 :return: a list of function-name strings.772 :return: a list of function-name strings.
483 """773 """
484 q = """MATCH (n) WHERE n.type = "program" RETURN n.name"""774 q = 'MATCH (n:program) RETURN n.name'
485 program_names = self._db.query(q, returns=str).elements775 program_names = self._db.query(q, returns=str).elements
486776
487 result = [el[0] for el in program_names]777 return set(el[0] for el in program_names)
488778
489 return set(result)
490779
491 def programs_with_metadata(self):780 def programs_with_metadata(self):
492 """781 """
@@ -498,27 +787,28 @@
498 787
499 """788 """
500 789
501 q = ("MATCH (base) WHERE base.type = 'program' "790 q = (' MATCH (p:program)'
502 "MATCH (base)-[:subject]->(n)"791 ' RETURN p.uploader, p.uploader_id, p.name, p.date,'
503 "RETURN base.uploader, base.uploader_id, base.name, base.date, count(n)")792 ' p.function_count, p.call_count')
504 result = self._db.query(q)793 result = self._db.query(q)
505 return {self.ProgramWithMetadata(*res) for res in result}794 return {self.ProgramWithMetadata(*res) for res in result}
506795
507 def check_program_exists(self, program_name):796 def check_program_exists(self, program_name):
508 """797 """
509 Execute query to check whether a program with the given name exists.798 Execute query to check whether a program with the given name exists.
510 Returns False if the program_name fails validation against Validator.799 Returns False if the program_name fails validation (i.e. is possibly
800 unsafe as a string in a cypher query).
511 :return: bool(the program exists in the database).801 :return: bool(the program exists in the database).
512 """802 """
513803
514 if not Validator.validate(program_name):804 if not validate_query(program_name):
515 return False805 return False
516806
517 q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "807 q = ('MATCH (p:program {{name: "{}"}}) RETURN p LIMIT 1'
518 "RETURN count(base)").format(program_name)808 .format(program_name))
519809
520 result = self._db.query(q, returns=int)810 result = self._db.query(q, returns=neo4jrestclient.Node)
521 return result.elements[0][0] > 0811 return bool(result)
522812
523 def check_function_exists(self, program_name, function_name):813 def check_function_exists(self, program_name, function_name):
524 """814 """
@@ -529,18 +819,18 @@
529 :param function_name: string name of the function to check for existence819 :param function_name: string name of the function to check for existence
530 :return: bool(names validate correctly, and function exists in program)820 :return: bool(names validate correctly, and function exists in program)
531 """821 """
532 if not self.check_program_exists(program_name):822 if not validate_query(program_name):
533 return False823 return False
534824
535 if not Validator.validate(program_name):825 pmatch = '(:program {{name: "{}"}})'.format(program_name)
536 return False826 fmatch = '(f:func {{name: "{}"}})'.format(function_name)
537827 # be explicit about index usage
538 q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program'"828 q = (' MATCH {}-[:subject]->{} USING INDEX f:func(name)'
539 "MATCH (base)-[r:subject]->(m) WHERE m.name = '{}'"829 ' RETURN f LIMIT 1'.format(pmatch, fmatch))
540 "RETURN count(m)").format(program_name, function_name)830
541831 # result will be an empty list if the function was not found
542 result = self._db.query(q, returns=int)832 result = self._db.query(q, returns=neo4jrestclient.Node)
543 return result.elements[0][0] > 0833 return bool(result)
544834
545 def get_function_names(self, program_name):835 def get_function_names(self, program_name):
546 """836 """
@@ -552,12 +842,11 @@
552 a set of function-name strings otherwise.842 a set of function-name strings otherwise.
553 """843 """
554844
555 if not self.check_program_exists(program_name):845 if not validate_query(program_name):
556 return None846 return set()
557847
558 q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "848 q = (' MATCH (:program {{name: "{}"}})-[:subject]->(f:func)'
559 "MATCH (base)-[r:subject]->(m) "849 ' RETURN f.name').format(program_name)
560 "RETURN m.name").format(program_name)
561 return {func[0] for func in self._db.query(q)}850 return {func[0] for func in self._db.query(q)}
562851
563 def get_all_functions_called(self, program_name, function_calling):852 def get_all_functions_called(self, program_name, function_calling):
@@ -570,16 +859,13 @@
570 :return: FunctionQueryResult, maximal subgraph rooted at function_calling859 :return: FunctionQueryResult, maximal subgraph rooted at function_calling
571 """860 """
572861
573 if not self.check_program_exists(program_name):
574 return None
575
576 if not self.check_function_exists(program_name, function_calling):862 if not self.check_function_exists(program_name, function_calling):
577 return None863 return None
578864
579 q = """MATCH (base) WHERE base.name = '{}' ANd base.type = 'program'865 q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})'
580 MATCH (base)-[:subject]->(m) WHERE m.name='{}'866 ' USING INDEX f:func(name)'
581 MATCH (m)-[:calls*]->(n)867 ' MATCH (f)-[:calls*]->(g) RETURN distinct f, g'
582 RETURN distinct n, m""".format(program_name, function_calling)868 .format(program_name, function_calling))
583869
584 return self._execute_query(program_name, q)870 return self._execute_query(program_name, q)
585871
@@ -593,16 +879,13 @@
593 :return: FunctionQueryResult, maximal connected subgraph with leaf function_called879 :return: FunctionQueryResult, maximal connected subgraph with leaf function_called
594 """880 """
595881
596 if not self.check_program_exists(program_name):
597 return None
598
599 if not self.check_function_exists(program_name, function_called):882 if not self.check_function_exists(program_name, function_called):
600 return None883 return None
601884
602 q = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'885 q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(g:func {{name: "{}"}})'
603 MATCH (base)-[r:subject]->(m) WHERE m.name='{}'886 ' USING INDEX g:func(name)'
604 MATCH (n)-[:calls*]->(m) WHERE n.name <> '{}'887 ' MATCH (f)-[:calls*]->(g) WHERE f.name <> "{}"'
605 RETURN distinct n , m"""888 ' RETURN distinct f , g')
606 q = q.format(program_name, function_called, program_name)889 q = q.format(program_name, function_called, program_name)
607890
608 return self._execute_query(program_name, q)891 return self._execute_query(program_name, q)
@@ -628,12 +911,14 @@
628 if not self.check_function_exists(program_name, function_calling):911 if not self.check_function_exists(program_name, function_calling):
629 return None912 return None
630913
631 q = r"""MATCH (pr) WHERE pr.name = '{}' AND pr.type = 'program'914 q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(start:func {{name: "{}"}})'
632 MATCH p=(start {{name: "{}" }})-[:calls*]->(end {{name:"{}"}})915 ' USING INDEX start:func(name)'
633 WHERE (pr)-[:subject]->(start)916 ' MATCH (p)-[:subject]->(end:func {{name: "{}"}})'
634 WITH DISTINCT nodes(p) AS result917 ' USING INDEX end:func(name)'
635 UNWIND result AS answer918 ' MATCH path=(start)-[:calls*]->(end)'
636 RETURN answer"""919 ' WITH DISTINCT nodes(path) AS result'
920 ' UNWIND result AS answer'
921 ' RETURN answer')
637 q = q.format(program_name, function_calling, function_called)922 q = q.format(program_name, function_calling, function_called)
638923
639 return self._execute_query(program_name, q)924 return self._execute_query(program_name, q)
@@ -648,11 +933,9 @@
648 if not self.check_program_exists(program_name):933 if not self.check_program_exists(program_name):
649 return None934 return None
650935
651 query = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'936 q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func)'
652 MATCH (base)-[subject:subject]->(m)937 ' RETURN (f)'.format(program_name))
653 RETURN DISTINCT (m)""".format(program_name)938 return self._execute_query(program_name, q)
654
655 return self._execute_query(program_name, query)
656939
657 def get_shortest_path_between_functions(self, program_name, func1, func2):940 def get_shortest_path_between_functions(self, program_name, func1, func2):
658 """941 """
@@ -671,9 +954,11 @@
671 if not self.check_function_exists(program_name, func2):954 if not self.check_function_exists(program_name, func2):
672 return None955 return None
673956
674 q = """MATCH (func1 {{ name:"{}" }}),(func2 {{ name:"{}" }}),957 q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})'
675 p = shortestPath((func1)-[:calls*]->(func2))958 ' USING INDEX f:func(name)'
676 UNWIND nodes(p) AS ans959 ' MATCH (p)-[:subject]->(g:func {{name: "{}"}})'
677 RETURN ans""".format(func1, func2)960 ' MATCH path=shortestPath((f)-[:calls*]->(g))'
961 ' UNWIND nodes(path) AS ans'
962 ' RETURN ans'.format(program_name, func1, func2))
678963
679 return self._execute_query(program_name, q)964 return self._execute_query(program_name, q)
680965
=== modified file 'src/sextant/export.py'
--- src/sextant/export.py 2014-09-04 09:46:18 +0000
+++ src/sextant/export.py 2014-10-23 12:33:12 +0000
@@ -46,7 +46,7 @@
46 font_name = "Helvetica"46 font_name = "Helvetica"
4747
48 for func in program.get_functions():48 for func in program.get_functions():
49 if func.type == "plt_stub":49 if func.type == "stub":
50 output_str += ' "{}" [fillcolor=pink, style=filled]\n'.format(func.name)50 output_str += ' "{}" [fillcolor=pink, style=filled]\n'.format(func.name)
51 elif func.type == "function_pointer":51 elif func.type == "function_pointer":
52 output_str += ' "{}" [fillcolor=yellow, style=filled]\n'.format(func.name)52 output_str += ' "{}" [fillcolor=yellow, style=filled]\n'.format(func.name)
@@ -108,7 +108,7 @@
108108
109 for func in program.get_functions():109 for func in program.get_functions():
110 display_func = ProgramConverter.get_display_name(func)110 display_func = ProgramConverter.get_display_name(func)
111 if func.type == "plt_stub":111 if func.type == "stub":
112 colour = "#ff00ff"112 colour = "#ff00ff"
113 elif func.type == "function_pointer":113 elif func.type == "function_pointer":
114 colour = "#99ffff"114 colour = "#99ffff"
@@ -175,4 +175,4 @@
175 output_str += '<edge source="{}" target="{}"> <data key="calls">1</data> </edge>\n'.format(func.name, callee.name)175 output_str += '<edge source="{}" target="{}"> <data key="calls">1</data> </edge>\n'.format(func.name, callee.name)
176176
177 output_str += '</graph>\n</graphml>'177 output_str += '</graph>\n</graphml>'
178 return output_str
179\ No newline at end of file178\ No newline at end of file
179 return output_str
180180
=== modified file 'src/sextant/objdump_parser.py' (properties changed: -x to +x)
--- src/sextant/objdump_parser.py 2014-08-18 13:00:53 +0000
+++ src/sextant/objdump_parser.py 2014-10-23 12:33:12 +0000
@@ -1,273 +1,313 @@
1# -----------------------------------------1#!/usr/bin/python
2# Sextant
3# Copyright 2014, Ensoft Ltd.
4# Author: Patrick Stevens
5# -----------------------------------------
6
7#!/usr/bin/python3
8
9import re
10import argparse2import argparse
11import os.path
12import subprocess3import subprocess
13import logging4import logging
145
156"""
16class ParsedObject():7Provide a parser class to extract functions and calls from an objdump file,
17 """8and a way to generate such a file from an object file.
18 Represents a function as parsed from an objdump disassembly.9"""
19 Has a name (which is the verbatim name like '__libc_start_main@plt'),10__all__ = ('Parser', 'run_objdump', 'FileNotFoundError')
20 a position (which is the virtual memory location in hex, like '08048320'11
21 extracted from the dump),12
22 and a canonical_position (which is the virtual memory location in hex13class FileNotFoundError(Exception):
23 but stripped of leading 0s, so it should be a14 """
24 unique id).15 Exception raised when Parser fails to open its file.
25 It also has a list what_do_i_call of ParsedObjects it calls using the16 """
26 assembly keyword 'call'.17 pass
27 It has a list original_code of its assembler code, too, in case it's useful.18
28 """19
2920class Parser(object):
30 @staticmethod21 """
31 def get_canonical_position(position):22 Extract functions and calls from an object file or an objdump output file.
32 return position.lstrip('0')23
3324 Only the specified sections of the disassembled code will be parsed.
34 def __eq__(self, other):25
35 return self.name == other.name26 Attributes:
3627 path:
37 def __init__(self, input_lines=None, assembler_section='', function_name='',28 Set to file_path in __init__.
38 ignore_function_pointers=True, function_pointer_id=None):29 _file:
39 """30 Set to file_object in __init__.
40 Create a new ParsedObject given the definition-lines from objdump -S.31 sections:
41 A sample first definition-line is '08048300 <__gmon_start__@plt>:\n'32 Initialised by taking the sections argument to __init__ and
42 but this method33 and converting it to a set.
43 expects to see the entire definition eg34 ignore_ptrs:
4435 Set to ignore_ptrs in __init__.
45080482f0 <puts@plt>:36
46 80482f0: ff 25 00 a0 04 08 jmp *0x804a00037 section_count:
47 80482f6: 68 00 00 00 00 push $0x038 The number of sections that have been parsed.
48 80482fb: e9 e0 ff ff ff jmp 80482e0 <_init+0x30>39 function_count:
4940 The number of functions that have been parsed.
50 We also might expect assembler_section, which is for instance '.init'41 call_count:
51 in 'Disassembly of section .init:'42 The number of function calls that have been parsed.
52 function_name is used if we want to give this function a custom name.43 function_ptr_count:
53 ignore_function_pointers=True will pretend that calls to (eg) *eax do44 The number of function pointers that have been detected.
54 not exist; setting to False makes us create stubs for those calls.45 _known_stubs:
55 function_pointer_id is only used internally; it refers to labelling46 A set of the names of functions with type 'stub' that have been
56 of function pointers if ignore_function_pointers is False. Each47 parsed - used to avoid registering a stub multiple times.
57 stub is given a unique numeric ID: this parameter tells init where48
58 to start counting these IDs from.49 """
5950 def __init__(self, file_path, file_object=None,
60 """51 sections=None, ignore_ptrs=False,
61 if input_lines is None:52 add_function=None, add_call=None,
62 # get around Python's inability to pass in empty lists by value53 started=None, finished=None):
63 input_lines = []54 """
6455 Initialise the parser object.
65 self.name = function_name or re.search(r'<.+>', input_lines[0]).group(0).strip('<>')56
66 self.what_do_i_call = []57 Raises:
67 self.position = ''58 FileNotFoundError:
6859 If file_object was not provided and file_path couldn't be
69 if input_lines:60 opened.
70 self.position = re.search(r'^[0-9a-f]+', input_lines[0]).group(0)61
71 self.canonical_position = ParsedObject.get_canonical_position(self.position)62 Arguments:
72 self.assembler_section = assembler_section63 file_path:
73 self.original_code = input_lines[1:]64 The path of the objdump output file to parse, or the path of an
65 object file to run objdump on and then parse.
66 file_object:
67 None if file_path is the path to an object file.
68 OR the file object (providing 'for line in file_object')
69 sections:
70 A list of the names of the disassembly sections to parse. An mepty
71 list will result in all sections being parsed.
72 ignore_ptrs:
73 If True, calls to function pointers will be ignored during parsing.
74 add_function:
75 A function to call when a function is parsed. Takes:
76 name: name of the parsed function
77 type: type of the parsed function
78 add_call:
79 A function to call when a function call is passed. Takes:
80 caller: name of the calling function
81 callee: name of the called function
82 started:
83 A function to call when the parse begins. Takes:
84 parser: the Parser instance which has just began parsing..
85 finished:
86 A function to call when the parse completes. Takes:
87 parser: the Parser instance which has just finished parsing.
88 e.g. if add_function/call have been set to write into files,
89 then finished may be set to properly flush and close them.
90 """
91 self.path = file_path
92 try:
93 self._file = file_object or self._open_file(file_path)
94 except FileNotFoundError:
95 raise
96
97 self.sections = set(sections or [])
98 self.ignore_ptrs = ignore_ptrs
99
100 self.section_count = 0
101 self.function_count = 0
102 self.call_count = 0
103 self.function_ptr_count = 0
104
105 # Avoid adding duplicate function stubs (as these are detected from
106 # function calls so may be repeated).
107 self._known_stubs = set()
108
109 # By default print information to stdout.
110 def print_func(name, typ):
111 print('func {:25}{}'.format(name, typ))
112
113 def print_call(caller, callee):
114 print('call {:25}{:25}'.format(caller, callee))
115
116 def print_started(parser):
117 print('parse started: {}[{}]'.format(self.path, ', '.join(self.sections)))
118
119
120 def print_finished(parser):
121 print('parsed {} functions and {} calls'.format(self.function_count, self.call_count))
122
123 self.add_function = add_function or print_func
124 self.add_call = add_call or print_call
125 self.started = lambda: (started or print_started)(self)
126 self.finished = lambda: (finished or print_finished)(self)
127
128
129 def _get_function_ptr_name(self):
130 """
131 Return a name for a new function pointer.
132 """
133 name = 'func_ptr_{}'.format(self.function_ptr_count)
134 self.function_ptr_count += 1
135 return name
136
137 def _add_function_normal(self, name):
138 """
139 Add a function which we have full assembly code for.
140 """
141 self.add_function(name, 'normal')
142 self.function_count += 1
143
144 def _add_function_ptr(self, name):
145 """
146 Add a function pointer.
147 """
148 self.add_function(name, 'pointer')
149 self.function_count += 1
150
151 def _add_function_stub(self, name):
152 """
153 Add a function stub - we have its name but none of its internals.
154 """
155 if not name in self._known_stubs:
156 self._known_stubs.add(name)
157 self.add_function(name, 'stub')
158 self.function_count += 1
159
160 def _add_call(self, caller, callee):
161 """
162 Add a function call from caller to callee.
163 """
164 self.add_call(caller, callee)
165 self.call_count += 1
166
167 def parse(self):
168 """
169 Parse self._file.
170 """
171 self.started()
172
173 if self._file is not None:
174 in_section = False # if we are in one of self.sections
175 current_function = None # track the caller for function calls
176
177 for line in self._file:
178 if line.startswith('Disassembly'):
179 # 'Disassembly of section <name>:\n'
180 section = line.split(' ')[-1].rstrip(':\n')
181 in_section = section in self.sections if self.sections else True
182 if in_section:
183 self.section_count += 1
184
185 elif in_section:
186 if line.endswith('>:\n'):
187 # '<address> <<function_identifier>>:\n'
188 # with <function_identifier> of form:
189 # <function_name>[@plt]
190 function_identifier = line.split('<')[-1].split('>')[0]
191
192 if '@' in function_identifier:
193 current_function = function_identifier.split('@')[0]
194 self._add_function_stub(current_function)
195 else:
196 current_function = function_identifier
197 self._add_function_normal(current_function)
198
199 elif 'call ' in line or 'callq ' in line:
200 # WHITESPACE to prevent picking up function names
201 # containing 'call'
202
203 # '<hex>: <hex> [l]call [hex] <callee_info>\n'
204 callee_info = line.split(' ')[-1].rstrip('\n')
205
206 # Where <callee_info> is either
207 # 1) '*(<register>)' call to a fn pointer
208 # 2) '$<hex>,$<hex>' lcall to a fn pointer
209 # 3) '<<function_identifier>>' call to a named function
210 if '<' in callee_info and '>' in callee_info:
211 # call to a normal or stub function
212 # '<function_identifier>' is of form <name>[@/-/+]<...>
213 # from which we extract name
214 callee_is_ptr = False
215 function_identifier = callee_info.lstrip('<').rstrip('>\n')
216 if '@' in function_identifier:
217 callee = function_identifier.split('@')[0]
218 self._add_function_stub(callee)
219 else:
220 callee = function_identifier.split('-')[-1].split('+')[0]
221 # Do not add this fn now - it is a normal func
222 # so we know about it from elsewhere.
223
224 else:
225 # Some kind of function pointer call.
226 callee_is_ptr = True
227 if not self.ignore_ptrs:
228 callee = self._get_function_ptr_name()
229 self._add_function_ptr(callee)
230
231 # Add the call.
232 if not (self.ignore_ptrs and callee_is_ptr):
233 self._add_call(current_function, callee)
74 234
75 call_regex_compiled = (ignore_function_pointers and re.compile(r'\tcall. +[^\*]+\n')) or re.compile(r'\tcall. +.+\n')235 self.finished()
76236
77 lines_where_i_call = [line for line in input_lines if call_regex_compiled.search(line)]237 self._file.close()
78238 result = True
79 if not ignore_function_pointers and not function_pointer_id:
80 function_pointer_id = [1]
81
82 for line in lines_where_i_call:
83 # we'll catch call and callq for the moment
84 called = (call_regex_compiled.search(line).group(0))[8:].lstrip(' ').rstrip('\n')
85 if called[0] == '*' and ignore_function_pointers == False:
86 # we have a function pointer, which we'll want to give a distinct name
87 address = '0'
88 name = '_._function_pointer_' + str(function_pointer_id[0])
89 function_pointer_id[0] += 1
90
91 self.what_do_i_call.append((address, name))
92
93 else: # we're not on a function pointer
94 called_split = called.split(' ')
95 if len(called_split) == 2:
96 address, name = called_split
97 name = name.strip('<>')
98 # we still want to remove address offsets like +0x09 from the end of name
99 match = re.match(r'^.+(?=\+0x[a-f0-9]+$)', name)
100 if match is not None:
101 name = match.group(0)
102 self.what_do_i_call.append((address, name.strip('<>')))
103 else: # the format of the "what do i call" is not recognised as a name/address pair
104 self.what_do_i_call.append(tuple(called_split))
105
106 def __str__(self):
107 if self.position:
108 return 'Memory address ' + self.position + ' with name ' + self.name + ' in section ' + str(
109 self.assembler_section)
110 else:239 else:
111 return 'Name ' + self.name240 result = False
112241
113 def __repr__(self):242 return result
114 out_str = 'Disassembly of section ' + self.assembler_section + ':\n\n' + self.position + ' ' + self.name + ':\n'243
115 return out_str + '\n'.join([' ' + line for line in self.original_code])244 def _open_file(self, path):
116245 """
117246 Open and return the file at path.
118class Parser:247
119 # Class to manipulate the output of objdump248 Raises:
120249 FileNotFoundError:
121 def __init__(self, input_file_location='', file_contents=None, sections_to_view=None, ignore_function_pointers=False):250 If the file fails to open.
122 """Creates a new Parser, given an input file path. That path should be an output from objdump -D.251
123 Alternatively, supply file_contents, as a list of each line of the objdump output. We expect newlines252 Arguments:
124 to have been stripped from the end of each of these lines.253 path:
125 sections_to_view makes sure we only use the specified sections (use [] for 'all sections' and None for none).254 The path of the file to open.
126 """255 """
127 if file_contents is None:
128 file_contents = []
129
130 if sections_to_view is None:
131 sections_to_view = []
132
133 if input_file_location:
134 file_to_read = open(input_file_location, 'r')
135 self.source_string_list = [line for line in file_to_read]
136 file_to_read.close()
137 elif file_contents:
138 self.source_string_list = [string + '\n' for string in file_contents]
139 self.parsed_objects = []
140 self.sections_to_view = sections_to_view
141 self.ignore_function_pointers = ignore_function_pointers
142 self.pointer_identifier = [1]
143
144 def create_objects(self):
145 """ Go through the source_string_list, getting object names (like 'main') along with the corresponding
146 definitions, and put them into parsed_objects """
147 if self.sections_to_view is None:
148 return
149
150 is_in_section = lambda name: self.sections_to_view == [] or name in self.sections_to_view
151
152 parsed_objects = []
153 current_object = []
154 current_section = ''
155 regex_compiled_addr_and_name = re.compile(r'[0-9a-f]+ <.+>:\n')
156 regex_compiled_section = re.compile(r'section .+:\n')
157
158 for line in self.source_string_list[4:]: # we bodge, since the file starts with a little bit of guff
159 if regex_compiled_addr_and_name.match(line):
160 # we are a starting line
161 current_object = [line]
162 elif re.match(r'Disassembly of section', line):
163 current_section = regex_compiled_section.search(line).group(0).lstrip('section ').rstrip(':\n')
164 current_object = []
165 elif line == '\n':
166 # we now need to stop parsing the current block, and store it
167 if len(current_object) > 0 and is_in_section(current_section):
168 parsed_objects.append(ParsedObject(input_lines=current_object, assembler_section=current_section,
169 ignore_function_pointers=self.ignore_function_pointers,
170 function_pointer_id=self.pointer_identifier))
171 else:
172 current_object.append(line)
173
174 # now we should be done. We assumed that blocks begin with r'[0-9a-f]+ <.+>:\n' and end with a newline.
175 # clear duplicates:
176
177 self.parsed_objects = []
178 for obj in parsed_objects:
179 if obj not in self.parsed_objects: # this is so that if we jump into the function at an offset,
180 # we still register it as being the old function, not some new function at a different address
181 # with the same name
182 self.parsed_objects.append(obj)
183
184 # by this point, each object contains a self.what_do_i_call which is a list of tuples
185 # ('address', 'name') if the address and name were recognised, or else (thing1, thing2, ...)
186 # where the instruction was call thing1 thing2 thing3... .
187
188 def object_lookup(self, object_name='', object_address=''):
189 """Returns the object with name object_name or address object_address (at least one must be given).
190 If objects with the given name or address
191 are not found, returns None."""
192
193 if object_name == '' and object_address == '':
194 return None
195
196 trial_obj = self.parsed_objects
197
198 if object_name != '':
199 trial_obj = [obj for obj in trial_obj if obj.name == object_name]
200
201 if object_address != '':
202 trial_obj = [obj for obj in trial_obj if
203 obj.canonical_position == ParsedObject.get_canonical_position(object_address)]
204
205 if len(trial_obj) == 0:
206 return None
207
208 return trial_obj
209
210def get_parsed_objects(filepath, sections_to_view, not_object_file, readable=False, ignore_function_pointers=False):
211 if sections_to_view is None:
212 sections_to_view = [] # because we use None for "no sections"; the intent of not providing any sections
213 # on the command line was to look at all sections, not none
214
215 # first, check whether the given file exists
216 if not os.path.isfile(filepath):
217 # we'd like to use FileNotFoundError, but we might be running under
218 # Python 2, which doesn't have it.
219 raise IOError(filepath + 'is not found.')
220
221 #now the file should exist
222 if not not_object_file: #if it is something we need to run through objdump first
223 #we need first to run the object file through objdump
224
225 objdump_file_contents = subprocess.check_output(['objdump', '-D', filepath])
226 objdump_str = objdump_file_contents.decode('utf-8')
227
228 p = Parser(file_contents=objdump_str.split('\n'), sections_to_view=sections_to_view, ignore_function_pointers=ignore_function_pointers)
229 else:
230 try:256 try:
231 p = Parser(input_file_location=filepath, sections_to_view=sections_to_view, ignore_function_pointers=ignore_function_pointers)257 result = open(path)
232 except UnicodeDecodeError:258 except Exception as e:
233 logging.error('File could not be parsed as a string. Did you mean to supply --object-file?')259 raise FileNotFoundError("parser failed to open `{}`: {}".format(path, e.strerror))
234 return False260
235261 return result
236 if readable: # if we're being called from the command line262
237 print('File read; beginning parse.')263
238 #file is now read, and we start parsing264def run_objdump(input_file):
239265 """
240 p.create_objects()266 Run the objdump command on the file with the given path.
241 return p.parsed_objects267
268 Return the input file path and a file object representing the result of
269 the objdump.
270
271 Arguments:
272 input_file:
273 The path of the file to run objdump on.
274
275 """
276 # A single section can be specified for parsing with the -j flag,
277 # but it is not obviously possible to parse multiple sections like this.
278 p = subprocess.Popen(['objdump', '-d', input_file, '--no-show-raw-insn'],
279 stdout=subprocess.PIPE)
280 g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$'], stdin=p.stdout, stdout=subprocess.PIPE)
281 return input_file, g.stdout
282
242283
243def main():284def main():
244 argumentparser = argparse.ArgumentParser(description="Parse the output of objdump.")285 """
245 argumentparser.add_argument('--filepath', metavar="FILEPATH", help="path to input file", type=str, nargs=1)286 Run the parser from the command line.
246 argumentparser.add_argument('--not-object-file', help="import text objdump output instead of the compiled file", default=False,287
247 action='store_true')288 The path of the target file, the sections to view and the ignore function
248 argumentparser.add_argument('--sections-to-view', metavar="SECTIONS",289 pointers flag are set with command line arguments.
249 help="sections of disassembly to view, like '.text'; leave blank for 'all'",290 """
250 type=str, nargs='*')291 ap = argparse.ArgumentParser(description="Parse the output of objdump.")
251 argumentparser.add_argument('--ignore-function-pointers', help='whether to skip parsing calls to function pointers', action='store_true', default=False)292 ap.add_argument('--filepath', metavar="FILEPATH",
252293 help="path to input file", type=str, nargs=1)
253 parsed = argumentparser.parse_args()294
295 ap.add_argument('--sections-to-view', metavar="SECTIONS",
296 help="disassembly sections to view, eg '.text'; leave blank for 'all'",
297 type=str, nargs='*')
298 ap.add_argument('--ignore-function-pointers',
299 help='skip parsing calls to function pointers',
300 action='store_true', default=False)
301
302 args = ap.parse_args()
254 303
255 filepath = parsed.filepath[0]304 filepath = args.filepath[0]
256 sections_to_view = parsed.sections_to_view305 sections = args.sections_to_view
257 not_object_file = parsed.not_object_file306 ignore_ptrs = args.ignore_function_pointers
258 readable = True307
259 function_pointers = parsed.ignore_function_pointers308 parser = Parser(filepath, sections, ignore_ptrs)
260309 parser.parse()
261 parsed_objs = get_parsed_objects(filepath, sections_to_view, not_object_file, readable, function_pointers)310
262 if parsed_objs is False:311
263 return 1312if __name__ == '__main__':
264
265 if readable:
266 for named_function in parsed_objs:
267 print(named_function.name)
268 print([f[-1] for f in named_function.what_do_i_call]) # use [-1] to get the last element, since:
269 #either we are in ('address', 'name'), when we want the last element, or else we are in (thing1, thing2, ...)
270 #so for the sake of argument we'll take the last thing
271
272if __name__ == "__main__":
273 main()313 main()
274314
=== modified file 'src/sextant/query.py'
--- src/sextant/query.py 2014-08-26 16:33:20 +0000
+++ src/sextant/query.py 2014-10-23 12:33:12 +0000
@@ -14,7 +14,7 @@
14from .export import ProgramConverter14from .export import ProgramConverter
1515
1616
17def query(remote_neo4j, input_query, display_neo4j='', program_name=None,17def query(connection, display_neo4j='', program_name=None,
18 argument_1=None, argument_2=None, suppress_common=False):18 argument_1=None, argument_2=None, suppress_common=False):
19 """19 """
20 Run a query against the database at remote_neo4j.20 Run a query against the database at remote_neo4j.
@@ -36,24 +36,24 @@
3636
37 """37 """
3838
39 if display_neo4j:39 # if display_neo4j:
40 display_url = display_neo4j40 # display_url = display_neo4j
41 else:41 # else:
42 display_url = remote_neo4j42 # display_url = remote_neo4j
4343
44 try:44 # try:
45 db = db_api.SextantConnection(remote_neo4j)45 # db = db_api.SextantConnection(remote_neo4j)
46 except requests.exceptions.ConnectionError as err:46 # except requests.exceptions.ConnectionError as err:
47 logging.error("Could not connect to Neo4J server {}. Are you sure it is running?".format(display_url))47 # logging.error("Could not connect to Neo4J server {}. Are you sure it is running?".format(display_url))
48 logging.error(str(err))48 # logging.error(str(err))
49 return 249 # return 2
50 #Not supported in python 250 # #Not supported in python 2
51 #except (urllib.exceptions.MaxRetryError):51 # #except (urllib.exceptions.MaxRetryError):
52 # logging.error("Connection was refused to {}. Are you sure the server is running?".format(remote_neo4j))52 # # logging.error("Connection was refused to {}. Are you sure the server is running?".format(remote_neo4j))
53 # return 253 # # return 2
54 except Exception as err:54 # except Exception as err:
55 logging.exception(str(err))55 # logging.exception(str(err))
56 return 256 # return 2
5757
58 prog = None58 prog = None
59 names_list = None59 names_list = None
@@ -66,38 +66,38 @@
66 if argument_1 is None:66 if argument_1 is None:
67 print('Supply one function name to functions-calling.')67 print('Supply one function name to functions-calling.')
68 return 168 return 1
69 prog = db.get_all_functions_calling(program_name, argument_1)69 prog = connection.get_all_functions_calling(program_name, argument_1)
70 elif input_query == 'functions-called-by':70 elif input_query == 'functions-called-by':
71 if argument_1 is None:71 if argument_1 is None:
72 print('Supply one function name to functions-called-by.')72 print('Supply one function name to functions-called-by.')
73 return 173 return 1
74 prog = db.get_all_functions_called(program_name, argument_1)74 prog = connection.get_all_functions_called(program_name, argument_1)
75 elif input_query == 'all-call-paths':75 elif input_query == 'all-call-paths':
76 if argument_1 is None and argument_2 is None:76 if argument_1 is None and argument_2 is None:
77 print('Supply two function names to calls-between.')77 print('Supply two function names to calls-between.')
78 return 178 return 1
79 prog = db.get_call_paths(program_name, argument_1, argument_2)79 prog = connection.get_call_paths(program_name, argument_1, argument_2)
80 elif input_query == 'whole-program':80 elif input_query == 'whole-program':
81 prog = db.get_whole_program(program_name)81 prog = connection.get_whole_program(program_name)
82 elif input_query == 'shortest-call-path':82 elif input_query == 'shortest-call-path':
83 if argument_1 is None and argument_2 is None:83 if argument_1 is None and argument_2 is None:
84 print('Supply two function names to shortest-path.')84 print('Supply two function names to shortest-path.')
85 return 185 return 1
86 prog = db.get_shortest_path_between_functions(program_name, argument_1, argument_2)86 prog = connection.get_shortest_path_between_functions(program_name, argument_1, argument_2)
87 elif input_query == 'functions':87 elif input_query == 'functions':
88 if program_name is not None:88 if program_name is not None:
89 func_names = db.get_function_names(program_name)89 func_names = connection.get_function_names(program_name)
90 if func_names:90 if func_names:
91 names_list = list(func_names)91 names_list = list(func_names)
92 else:92 else:
93 print('No functions were found in program %s on server %s.' % (program_name, display_url))93 print('No functions were found in program %s on server %s.' % (program_name, display_url))
94 else:94 else:
95 list_of_programs = db.get_program_names()95 list_of_programs = connection.get_program_names()
96 if not list_of_programs:96 if not list_of_programs:
97 print('Server %s database empty.' % (display_url))97 print('Server %s database empty.' % (display_url))
98 return 098 return 0
9999
100 func_list = [db.get_function_names(prog_name)100 func_list = [connection.get_function_names(prog_name)
101 for prog_name in list_of_programs]101 for prog_name in list_of_programs]
102102
103 if not func_list:103 if not func_list:
@@ -105,7 +105,7 @@
105 else:105 else:
106 names_list = func_list106 names_list = func_list
107 elif input_query == 'programs':107 elif input_query == 'programs':
108 list_found = list(db.get_program_names())108 list_found = list(connection.get_program_names())
109 if not list_found:109 if not list_found:
110 print('No programs were found on server {}.'.format(display_url))110 print('No programs were found on server {}.'.format(display_url))
111 else:111 else:
@@ -122,7 +122,5 @@
122 print('Nothing was returned from the query.')122 print('Nothing was returned from the query.')
123123
124124
125def audit(remote_neo4j):125def audit(connection):
126 db = db_api.SextantConnection(remote_neo4j)126 return connection.programs_with_metadata()
127
128 return db.programs_with_metadata()
129127
=== added file 'src/sextant/sshmanager.py'
--- src/sextant/sshmanager.py 1970-01-01 00:00:00 +0000
+++ src/sextant/sshmanager.py 2014-10-23 12:33:12 +0000
@@ -0,0 +1,278 @@
1import os
2import getpass
3import logging
4import subprocess
5
6"""Provide a class to manage an SSH tunnel and controller"""
7__all__ = ('SSHConnectionError', 'SSHCommandError', 'SSHManager')
8
9# The location of the temporary directory to create on the REMOTE machine.
10# Temporary files will be scp'd here prior to upload to the neo4j database.
11TMP_DIR = '/tmp/sextant'
12
13
14class SSHConnectionError(Exception):
15 """
16 An exception raised when an attempt to establish an ssh conneciton fails.
17 """
18 pass
19
20
21class SSHCommandError(Exception):
22 """
23 An exception raised when an attempt to run a command over ssh fails.
24 """
25 pass
26
27
28class SSHManager(object):
29 """
30 Manage an ssh tunnel with port forwarding.
31
32 Attributes:
33 local_port:
34 The port number on the local machine to forward.
35 remote_host:
36 The host to ssh into.
37 remote_port:
38 The port number on the remote host to connect to.
39 ssh_user:
40 The username to use for sshing - defaults to None, in which case
41 the ssh connection uses the username of the user who ran sextant.
42
43 _controller_name:
44 The base of the identifying name for the ssh controller - the
45 actual name will be a combination of this and the local port.
46 _is_localhost:
47 True if we are trying to ssh into localhost. In this case do not
48 open the tunnel, just provide the right api so the rest of Sextant
49 need not special case.
50 """
51
52 def __init__(self, local_port, remote_host, remote_port,
53 ssh_user=None, is_localhost=False):
54 """
55 Open an SSH tunnel with multiplexing enabled.
56
57 Raises:
58 ValueError:
59 If local_port or remote_port are not positive integers
60
61 Arguments:
62 local_port:
63 The number of the local port to forward.
64 remote_host:
65 The name of the remote host to connect to.
66 remote_port:
67 The port number on the remote host to connect to.
68 ssh_user:
69 An alternative user name to use for the ssh login.
70 is_localhost:
71 True if we are trying to ssh into localhost.
72 """
73 if not (isinstance(local_port, int) and local_port > 0):
74 raise ValueError(
75 'Local port {} must be a positive integer.'.format(local_port))
76 if not (isinstance(remote_port, int) and remote_port > 0):
77 raise ValueError(
78 'Remote port {} must be a positive integer.'.format(remote_port))
79
80 self.local_port = local_port
81 self.remote_host = remote_host
82 self.remote_port = remote_port
83 self.ssh_user = ssh_user
84
85 self._tmp_dir = '{}-{}'.format(TMP_DIR, self.ssh_user or getpass.getuser())
86
87 self._controller_name = 'sextantcontroller{}'.format(local_port)
88 self._is_localhost = is_localhost
89
90 self._open()
91
92 def _open(self):
93 """
94 Helper function to open the SSH tunnel.
95
96 Raises:
97 SSHConnectionError:
98 If the ssh command failed to run.
99 """
100 if self._is_localhost:
101 return
102
103 # This cmd string will be .format()ed in a few lines' time.
104 cmd = ['ssh']
105
106 if self.ssh_user:
107 # ssh -l {user} ... sets the remote login username
108 cmd.extend(['-l', self.ssh_user])
109
110 # -L localport:localhost:remoteport forwards the port.
111 port_fwd = '{}:localhost:{}'.format(self.local_port, self.remote_port)
112
113 # -M makes SSH able to accept slave connections.
114 # -S sets the location of a control socket (in this case, sextantcontroller.
115 # with a unique identifier appended, just in case we run sextant twice.
116 # simultaneously), so we know how to close the port again.
117 # -f goes into background; -N does not execute a remote command;
118 # -T says to remote host that we don't want a text shell.
119 cmd.extend(['-M', '-S', self._controller_name, '-fNT',
120 '-L', port_fwd, self.remote_host])
121
122 logging.debug('Opening SSH tunnel with cmd: {}'.format(' '.join(cmd)))
123
124 rc = subprocess.call(cmd)
125 if rc:
126 raise SSHConnectionError('SSH setup failed with error {}'.format(rc))
127
128 logging.debug('SSH tunnel created')
129
130 self._make_tmp_dir()
131
132 def close(self):
133 """
134 Close the SSH tunnel after cleaning the temp directory.
135 """
136 if self._is_localhost:
137 return
138
139 # ssh -O sends a command to the slave specified in -S, -q for quiet.
140 cmd = ['ssh', '-S', self._controller_name,
141 '-O', 'exit', '-q', self.remote_host]
142
143 logging.debug('Shutting down SSH tunnel with cmd: `{}`'
144 .format(' '.join(cmd)))
145
146 # SSH has a bug on some systems which causes it to ignore the -q flag
147 # meaning it prints "Exit request sent." to stderr.
148 # To avoid this, we grab stderr temporarily, and see if it's that string;
149 # if it is, suppress it.
150 pr = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
151 stdout, stderr = pr.communicate()
152 if stderr.rstrip() != 'Exit request sent.':
153 logging.error('SSH shutdown stderr: {}'.format(stderr))
154
155 if pr.returncode == 0:
156 logging.debug('Shut down successfully')
157 else:
158 logging.error('SSH shutdown failed with code {}'
159 .format(pr.returncode))
160
161 # Clean the temporary directory we created earlier.
162 self._delete_tmp_dir()
163
164 def _call(self, *args):
165 """
166 Execute a command on the remote machine over SSH.
167
168 Return a tuple of rc, stdout, stderr from the process call.
169
170 Arguments:
171 *args:
172 Strings containing the individual words of the command to
173 execute. E.g. _call('ls', '-lh', '.').
174 """
175 if self._is_localhost:
176 return (1, None, 'Cannot call SSH command from localhost')
177
178 ssh_cmd = ['ssh', '-S', self._controller_name, self.remote_host]
179 ssh_cmd.extend(args)
180 p = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
181 stdout, stderr = p.communicate()
182
183 if p.returncode:
184 logging.debug('Call to `{}` failed with code: {}, stderr: {}'
185 .format(' '.join(ssh_cmd), p.returncode, stderr))
186
187 return p.returncode, stdout, stderr
188
189 def _make_tmp_dir(self):
190 """
191 Create the per-user temporary directory on the remote machine.
192 """
193 self._call('mkdir', '-p', self._tmp_dir)
194
195 def _delete_tmp_dir(self):
196 """
197 Remove the temporary directory on the remote machine.
198 """
199 self._call('rm', '-r', self._tmp_dir)
200
201
202 def send_to_tmp_dir(self, path_list):
203 """
204 Send the specified files to the temporary directory on the remote machine.
205
206 Return an iterator of save paths on the remote machine.
207 Raises:
208 ValueError:
209 If no file paths were provided, or if one or more of the
210 provided paths is not an actual file.
211 SSHCommandError:
212 If the scp command failed for any reason.
213
214 Arguments:
215 path_list:
216 Iterator of paths to the files on the local machine. All files
217 will be checked before copying to ensure that they exist and
218 to prevent passing arbitrary arguments to the ssh _call
219 command.
220 """
221 if not path_list:
222 raise ValueError('attempt to copy zero files')
223
224 # If we are in localhost, we are not controlling the TMP_DIR,
225 # so the files are already there.
226 if self._is_localhost:
227 return path_list
228
229 # Make sure we can take the len of path_list and iterate over it
230 # more than once.
231 path_list = list(path_list)
232
233 # Check that actual files are being copied - not random strings.
234 to_copy = [f for f in path_list if os.path.isfile(f)]
235
236 if len(to_copy) < len(path_list):
237 missed = [f for f in path_list if not f in to_copy]
238 raise ValueError('Attempted to copy non existant files: {}'
239 .format(', '.join(missed)))
240
241 scp_cmd = ['scp']
242 scp_cmd.extend(to_copy)
243 scp_cmd.append('{}:{}'.format(self.remote_host, self._tmp_dir))
244
245 proc = subprocess.Popen(scp_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
246 rc = proc.wait()
247 if rc:
248 raise SSHCommandError('scp failed with code {}: {}'.format(rc, stderr))
249
250 return (os.path.join(self._tmp_dir, os.path.basename(f)) for f in to_copy)
251
252 def remove_from_tmp_dir(self, path_list):
253 """
254 Delete the files specified as arguments from the remote machine.
255
256 The output of send_to_tmp_dir may be passed as input to this function.
257
258 Raises:
259 SSHCommandError:
260 If the rm command fails for any reason.
261
262 Arguments:
263 path_list:
264 Iterator of paths of the files on the remote machine, relative
265 to the temporary directory. E.g. remove_from_tmp_dir('foo')
266 will delete the file self._tmp_dir/foo
267 """
268 if self._is_localhost:
269 return
270
271 # Assume we can trust this file list.
272 paths = [os.path.join(self._tmp_dir, os.path.basename(f)) for f in path_list]
273 self._call('rm', *paths)
274
275
276
277
278
0279
=== added file 'src/sextant/test_all.sh'
--- src/sextant/test_all.sh 1970-01-01 00:00:00 +0000
+++ src/sextant/test_all.sh 2014-10-23 12:33:12 +0000
@@ -0,0 +1,4 @@
1#!/usr/bin/bash
2
3PYTHONPATH=$PYTHONPATH:~/.
4python -m unittest discover --pattern=test_*.py
05
=== added file 'src/sextant/test_csvwriter.py'
--- src/sextant/test_csvwriter.py 1970-01-01 00:00:00 +0000
+++ src/sextant/test_csvwriter.py 2014-10-23 12:33:12 +0000
@@ -0,0 +1,89 @@
1#!/usr/bin/python
2import unittest
3from csvwriter import CSVWriter
4import subprocess
5from os import listdir
6
7class TestSequence(unittest.TestCase):
8 def get_writer(self, path='tmp_test', headers=['name', 'type'], split=100):
9 return CSVWriter(path, headers, split)
10
11 def tearDown(self):
12 to_rm = [f for f in listdir('.') if f.startswith('tmp_test') and f.endswith('.csv')]
13 if to_rm:
14 rc = subprocess.call(['rm'] + to_rm)
15 if rc:
16 msg = 'failed to clean'
17 else:
18 msg = 'cleaned'
19 print('{} {} files {}'.format(msg, len(to_rm), to_rm))
20
21 def test_headers(self):
22 # check that headers are being written correctly
23 headers = ['some', 'headers', 'to', 'check']
24 writer = self.get_writer(headers=headers)
25 writer.finish()
26
27 expected_path = 'tmp_test0.csv'
28 self.assertEquals(writer.file_iter().next(), expected_path)
29 writer_file = open('tmp_test0.csv', 'r')
30
31 self.assertEquals(writer_file.readline(), 'some,headers,to,check\n')
32 self.assertFalse(writer_file.readline()) # check that nothing extra is written
33
34 writer_file.close()
35
36 def test_writing(self):
37 # check that csv entries are written correctly, and errors
38 # appropriately raised for invalid input
39 writer = self.get_writer()
40
41 self.assertRaises(ValueError, writer.write, 'too short')
42 self.assertRaises(ValueError, writer.write, 'slightly', 'too', 'long')
43 writer.write('just', 'write')
44
45 writer.finish()
46
47 writer_file = open(writer.file_iter().next(), 'r+')
48
49 self.assertEqual(writer_file.readline(), 'name,type\n')
50 self.assertEqual(writer_file.readline(), 'just,write\n')
51 self.assertFalse(writer_file.readline())
52
53 writer_file.close()
54
55 def test_split(self):
56 split = 10
57 files = 10
58 writer = self.get_writer(split=split)
59
60 for i in xrange(files*(split-1)): # split-1 to account for header line
61 writer.write('an', 'entry')
62
63 writer.finish()
64
65 gen_count = sum(1 for f in writer.file_iter())
66 self.assertEqual(gen_count, files,
67 'generated {} files, expected {}'
68 .format(gen_count, files))
69
70 for f in writer.file_iter():
71 with open(f, 'r+') as wf:
72 header_line = wf.readline()
73 header_expected = 'name,type\n'
74 self.assertEqual(header_line, header_expected,
75 '{} contained header {}, expected {}'
76 .format(f, header_line, header_expected)) # check headers
77
78 # check line count
79 with open(f, 'r+') as wf:
80 line_count = sum(1 for line in wf)
81 self.assertEqual(line_count, split,
82 '{} contained {} lines, expected {}'
83 .format(f, line_count, split))
84
85
86if __name__ == '__main__':
87 unittest.main()
88
89
090
=== renamed file 'src/sextant/tests.py' => 'src/sextant/test_db_api.py' (properties changed: -x to +x)
--- src/sextant/tests.py 2014-08-14 15:23:39 +0000
+++ src/sextant/test_db_api.py 2014-10-23 12:33:12 +0000
@@ -1,3 +1,4 @@
1#!/usr/bin/python
1# -----------------------------------------2# -----------------------------------------
2# Sextant3# Sextant
3# Copyright 2014, Ensoft Ltd.4# Copyright 2014, Ensoft Ltd.
@@ -10,56 +11,69 @@
10from db_api import Function11from db_api import Function
11from db_api import FunctionQueryResult12from db_api import FunctionQueryResult
12from db_api import SextantConnection13from db_api import SextantConnection
13from db_api import Validator14from db_api import validate_query
1415
1516
16class TestFunctionQueryResults(unittest.TestCase):17class TestFunctionQueryResults(unittest.TestCase):
17 def setUp(self):18 @classmethod
19 def setUpClass(cls):
18 # we need to set up the remote database by using the neo4j_input_api20 # we need to set up the remote database by using the neo4j_input_api
19 self.remote_url = 'http://ensoft-sandbox:7474'21 cls.remote_url = 'http://ensoft-sandbox:7474'
2022
21 self.setter_connection = SextantConnection(self.remote_url)23 cls.setter_connection = SextantConnection('ensoft-sandbox', 7474)
22 self.program_1_name = 'testprogram'24
23 self.upload_program = self.setter_connection.new_program(self.program_1_name)25 cls.program_1_name = 'testprogram'
24 self.upload_program.add_function('func1')26 cls.one_node_program_name = 'testprogram1'
25 self.upload_program.add_function('func2')27 cls.empty_program_name = 'testprogramblank'
26 self.upload_program.add_function('func3')28
27 self.upload_program.add_function('func4')29 # if anything failed before, delete programs now
28 self.upload_program.add_function('func5')30 cls.setter_connection.delete_program(cls.program_1_name)
29 self.upload_program.add_function('func6')31 cls.setter_connection.delete_program(cls.one_node_program_name)
30 self.upload_program.add_function('func7')32 cls.setter_connection.delete_program(cls.empty_program_name)
31 self.upload_program.add_function_call('func1', 'func2')33
32 self.upload_program.add_function_call('func1', 'func4')34
33 self.upload_program.add_function_call('func2', 'func1')35 cls.upload_program = cls.setter_connection.new_program(cls.program_1_name)
34 self.upload_program.add_function_call('func2', 'func4')36 cls.upload_program.add_function('func1')
35 self.upload_program.add_function_call('func3', 'func5')37 cls.upload_program.add_function('func2')
36 self.upload_program.add_function_call('func4', 'func4')38 cls.upload_program.add_function('func3')
37 self.upload_program.add_function_call('func4', 'func5')39 cls.upload_program.add_function('func4')
38 self.upload_program.add_function_call('func5', 'func1')40 cls.upload_program.add_function('func5')
39 self.upload_program.add_function_call('func5', 'func2')41 cls.upload_program.add_function('func6')
40 self.upload_program.add_function_call('func5', 'func3')42 cls.upload_program.add_function('func7')
41 self.upload_program.add_function_call('func6', 'func7')43 cls.upload_program.add_call('func1', 'func2')
4244 cls.upload_program.add_call('func1', 'func4')
43 self.upload_program.commit()45 cls.upload_program.add_call('func2', 'func1')
4446 cls.upload_program.add_call('func2', 'func4')
45 self.one_node_program_name = 'testprogram1'47 cls.upload_program.add_call('func3', 'func5')
46 self.upload_one_node_program = self.setter_connection.new_program(self.one_node_program_name)48 cls.upload_program.add_call('func4', 'func4')
47 self.upload_one_node_program.add_function('lonefunc')49 cls.upload_program.add_call('func4', 'func5')
4850 cls.upload_program.add_call('func5', 'func1')
49 self.upload_one_node_program.commit()51 cls.upload_program.add_call('func5', 'func2')
52 cls.upload_program.add_call('func5', 'func3')
53 cls.upload_program.add_call('func6', 'func7')
54
55 cls.upload_program.commit()
56
57 cls.upload_one_node_program = cls.setter_connection.new_program(cls.one_node_program_name)
58 cls.upload_one_node_program.add_function('lonefunc')
59
60 cls.upload_one_node_program.commit()
50 61
51 self.empty_program_name = 'testprogramblank'62 cls.upload_empty_program = cls.setter_connection.new_program(cls.empty_program_name)
52 self.upload_empty_program = self.setter_connection.new_program(self.empty_program_name)63
5364 cls.upload_empty_program.commit()
54 self.upload_empty_program.commit()65
5566 cls.getter_connection = cls.setter_connection
56 self.getter_connection = SextantConnection(self.remote_url)67
5768
58 def tearDown(self):69 @classmethod
59 self.setter_connection.delete_program(self.upload_program.program_name)70 def tearDownClass(cls):
60 self.setter_connection.delete_program(self.upload_one_node_program.program_name)71 cls.setter_connection.delete_program(cls.upload_program.program_name)
61 self.setter_connection.delete_program(self.upload_empty_program.program_name)72 cls.setter_connection.delete_program(cls.upload_one_node_program.program_name)
62 del(self.setter_connection)73 cls.setter_connection.delete_program(cls.upload_empty_program.program_name)
74
75 cls.setter_connection.close()
76 del(cls.setter_connection)
6377
64 def test_17_get_call_paths(self):78 def test_17_get_call_paths(self):
65 reference1 = FunctionQueryResult(parent_db=None, program_name=self.program_1_name)79 reference1 = FunctionQueryResult(parent_db=None, program_name=self.program_1_name)
@@ -134,7 +148,7 @@
134148
135 def test_08_get_program_names(self):149 def test_08_get_program_names(self):
136 reference = {self.program_1_name, self.one_node_program_name, self.empty_program_name}150 reference = {self.program_1_name, self.one_node_program_name, self.empty_program_name}
137 self.assertEqual(reference, self.getter_connection.get_program_names())151 self.assertTrue(reference.issubset(self.getter_connection.get_program_names()))
138152
139153
140 def test_11_get_all_functions_called(self):154 def test_11_get_all_functions_called(self):
@@ -249,13 +263,13 @@
249 self.assertIsNone(self.getter_connection.get_call_paths(self.one_node_program_name, 'notafunc', 'notafunc'))263 self.assertIsNone(self.getter_connection.get_call_paths(self.one_node_program_name, 'notafunc', 'notafunc'))
250264
251 def test_10_validator(self):265 def test_10_validator(self):
252 self.assertFalse(Validator.validate(''))266 self.assertFalse(validate_query(''))
253 self.assertTrue(Validator.validate('thisworks'))267 self.assertTrue(validate_query('thisworks'))
254 self.assertTrue(Validator.validate('th1sw0rks'))268 self.assertTrue(validate_query('th1sw0rks'))
255 self.assertTrue(Validator.validate('12345'))269 self.assertTrue(validate_query('12345'))
256 self.assertFalse(Validator.validate('this does not work'))270 self.assertFalse(validate_query('this does not work'))
257 self.assertTrue(Validator.validate('this_does_work'))271 self.assertTrue(validate_query('this_does_work'))
258 self.assertFalse(Validator.validate("'")) # string consisting of a single quote mark272 self.assertFalse(validate_query("'")) # string consisting of a single quote mark
259273
260if __name__ == '__main__':274if __name__ == '__main__':
261 unittest.main()
262\ No newline at end of file275\ No newline at end of file
276 unittest.main()
263277
=== added file 'src/sextant/test_parser.py'
--- src/sextant/test_parser.py 1970-01-01 00:00:00 +0000
+++ src/sextant/test_parser.py 2014-10-23 12:33:12 +0000
@@ -0,0 +1,85 @@
1#!/usr/bin/python
2from collections import defaultdict
3import unittest
4import subprocess
5
6import objdump_parser as parser
7
8DUMP_FILE = 'test_resources/parser_test.dump'
9
10class TestSequence(unittest.TestCase):
11 def setUp(self):
12 pass
13
14 def add_function(self, dct, name, typ):
15 self.assertFalse(name in dct, "duplicate function added: {} into {}".format(name, dct.keys()))
16 dct[name] = typ
17
18 def add_call(self, dct, caller, callee):
19 dct[caller].append(callee)
20
21 def do_parse(self, path=DUMP_FILE, sections=['.text'], ignore_ptrs=False):
22 functions = {}
23 calls = defaultdict(list)
24
25 # set the Parser to put output in local dictionaries
26 add_function = lambda n, t: self.add_function(functions, n, t)
27 add_call = lambda a, b: self.add_call(calls, a, b)
28
29 p = parser.Parser(path, sections=sections, ignore_ptrs=ignore_ptrs,
30 add_function=add_function, add_call=add_call)
31 res = p.parse()
32
33 parser.add_function = None
34 parser.add_call = None
35
36 return res, functions, calls
37
38
39 def test_open(self):
40 self.assertRaises(parser.FileNotFoundError, parser.Parser, file_path='rubbish file')
41
42 def test_functions(self):
43 # ensure that the correct functions are listed with the correct types
44 res, funcs, calls = self.do_parse()
45
46 for name, typ in zip(['normal', 'duplicates', 'wierd$name', 'printf', 'func_ptr_3'],
47 ['normal', 'normal', 'normal', 'stub', 'pointer']):
48 self.assertTrue(name in funcs, "'{}' not found in function dictionary".format(name))
49 self.assertEquals(funcs[name], typ)
50
51 self.assertFalse('__gmon_start__' in funcs, "don't see a function defined in .plt")
52
53 def test_no_ptrs(self):
54 # ensure that the ignore_ptrs flags is working
55 res, funcs, calls = self.do_parse(ignore_ptrs=True)
56
57 self.assertFalse('pointer' in funcs.values())
58 self.assertEqual(len(calls['normal']), 2)
59
60
61 def test_calls(self):
62 res, funcs, calls = self.do_parse()
63
64 self.assertTrue('normal' in calls['main'])
65 self.assertTrue('duplicates' in calls['main'])
66
67 normal_calls = sorted(['wierd$name', 'printf', 'func_ptr_3'])
68 self.assertEquals(sorted(calls['normal']), normal_calls)
69
70 self.assertEquals(calls['duplicates'].count('normal'), 2)
71 self.assertEquals(calls['duplicates'].count('printf'), 2,
72 "expected 2 printf calls in {}".format(calls['duplicates']))
73 self.assertTrue('func_ptr_4' in calls['duplicates'])
74 self.assertTrue('func_ptr_5' in calls['duplicates'])
75
76 def test_sections(self):
77 res, funcs, calls = self.do_parse(sections=['.plt', '.text'])
78
79 # check that we have got rid of the @s in the names
80 self.assertTrue('@' not in ''.join(funcs.keys()), "check names are extracted correctly")
81 self.assertTrue('__gmon_start__' in funcs, "see a function defined only in .plt")
82
83
84if __name__ == '__main__':
85 unittest.main()
086
=== added directory 'src/sextant/test_resources'
=== added file 'src/sextant/test_resources/parser_test'
1Binary files src/sextant/test_resources/parser_test 1970-01-01 00:00:00 +0000 and src/sextant/test_resources/parser_test 2014-10-23 12:33:12 +0000 differ87Binary files src/sextant/test_resources/parser_test 1970-01-01 00:00:00 +0000 and src/sextant/test_resources/parser_test 2014-10-23 12:33:12 +0000 differ
=== added file 'src/sextant/test_resources/parser_test.c'
--- src/sextant/test_resources/parser_test.c 1970-01-01 00:00:00 +0000
+++ src/sextant/test_resources/parser_test.c 2014-10-23 12:33:12 +0000
@@ -0,0 +1,57 @@
1// COMMENT
2#include<stdio.h>
3
4static int
5normal(int a);
6
7static int
8wierd$name(int a);
9
10typedef int (*pointer)(int);
11
12static int
13normal(int a)
14{
15 /* call a normal func,
16 * a stub and a pointer
17 */
18 pointer ptr = wierd$name;
19
20 wierd$name(a);
21 printf("%d\n", a);
22 ptr(a);
23
24 return (a);
25}
26
27static int
28wierd$name(int a)
29{
30 return (a);
31}
32
33static int
34duplicates(int a)
35{
36 pointer ptr1 = wierd$name;
37
38 /* check stubs don't get duplicated */
39 printf("first %d\n", a);
40 printf("second %d\n", a);
41
42 normal(a);
43 normal(a);
44
45 ptr1(a);
46 ptr1(a);
47
48 return (a);
49}
50
51int
52main(void)
53{
54 normal(1);
55 duplicates(1);
56 return (0);
57}
058
=== added file 'src/sextant/test_resources/parser_test.dump'
--- src/sextant/test_resources/parser_test.dump 1970-01-01 00:00:00 +0000
+++ src/sextant/test_resources/parser_test.dump 2014-10-23 12:33:12 +0000
@@ -0,0 +1,44 @@
1Disassembly of section .init:
2080482b4 <_init>:
3 80482b8: call 8048350 <__x86.get_pc_thunk.bx>
4 80482cd: call 8048300 <__gmon_start__@plt>
5Disassembly of section .plt:
6080482e0 <printf@plt-0x10>:
7080482f0 <printf@plt>:
808048300 <__gmon_start__@plt>:
908048310 <__libc_start_main@plt>:
10Disassembly of section .text:
1108048320 <_start>:
12 804833c: call 8048310 <__libc_start_main@plt>
1308048350 <__x86.get_pc_thunk.bx>:
1408048360 <deregister_tm_clones>:
15 8048386: call *%eax
1608048390 <register_tm_clones>:
17 80483c3: call *%edx
18080483d0 <__do_global_dtors_aux>:
19 80483df: call 8048360 <deregister_tm_clones>
20080483f0 <frame_dummy>:
21 804840f: call *%eax
220804841d <normal>:
23 8048430: call 8048458 <wierd$name>
24 8048443: call 80482f0 <printf@plt>
25 8048451: call *%eax
2608048458 <wierd$name>:
2708048460 <duplicates>:
28 804847b: call 80482f0 <printf@plt>
29 804848e: call 80482f0 <printf@plt>
30 8048499: call 804841d <normal>
31 80484a4: call 804841d <normal>
32 80484b2: call *%eax
33 80484bd: call *%eax
34080484c4 <main>:
35 80484d4: call 804841d <normal>
36 80484e0: call 8048460 <duplicates>
37080484f0 <__libc_csu_init>:
38 80484f6: call 8048350 <__x86.get_pc_thunk.bx>
39 804850e: call 80482b4 <_init>
40 804853b: call *-0xf8(%ebx,%edi,4)
4108048560 <__libc_csu_fini>:
42Disassembly of section .fini:
4308048564 <_fini>:
44 8048568: call 8048350 <__x86.get_pc_thunk.bx>
045
=== added file 'src/sextant/test_sshmanager.py'
--- src/sextant/test_sshmanager.py 1970-01-01 00:00:00 +0000
+++ src/sextant/test_sshmanager.py 2014-10-23 12:33:12 +0000
@@ -0,0 +1,72 @@
1#!/usr/bin/python3
2import unittest
3import sshmanager
4import sshmanager
5import os
6sshmanager.TMP_DIR = '/home/benhutc/obj/csvload/src/sextant/test_resources/tmp'
7
8
9class TestSequence(unittest.TestCase):
10 def setUp(self):
11 self.manager = None
12
13 def tearDown(self):
14 if self.manager:
15 self.manager.close()
16 self.manager = None
17
18 def get_manager(self, local_port=9643, remote_host='localhost',
19 remote_port=9643, ssh_user=None):
20 return sshmanager.SSHManager(local_port, remote_host, remote_port, ssh_user)
21
22 def test_init(self):
23 self.assertRaises(ValueError, self.get_manager, local_port='invalid port')
24 self.assertRaises(ValueError, self.get_manager, remote_port='invalid port')
25
26 def test_connect(self):
27 # make a connection to localhost and ensure that tmp is created
28 self.manager = self.get_manager()
29 self.assertTrue(os.path.isdir(self.manager._tmp_dir))
30 self.manager.close()
31 self.assertFalse(os.path.isdir(self.manager._tmp_dir))
32 self.manager = None
33
34 # check connecion failure
35 self.assertRaises(sshmanager.SSHConnectionError, self.get_manager, remote_host='invalid host')
36
37 def test_files(self):
38 genuine_file = 'test_resources/parser_test.c'
39 genuine_file2 = 'test_resources/parser_test'
40 absent_file = 'absent_file'
41
42 self.manager = self.get_manager()
43 # check sending no files fails
44 self.assertRaises(ValueError, self.manager.send_to_tmp_dir, [])
45 # and sending an non-existent file
46 self.assertRaises(ValueError, self.manager.send_to_tmp_dir, [absent_file, genuine_file])
47
48 self.manager.send_to_tmp_dir([genuine_file, genuine_file2])
49 self.assertTrue(os.path.isfile(os.path.join(self.manager._tmp_dir, genuine_file.split('/')[-1])))
50 self.assertTrue(os.path.isfile(os.path.join(self.manager._tmp_dir, genuine_file2.split('/')[-1])))
51
52 self.manager.remove_from_tmp_dir([genuine_file, genuine_file2])
53 self.assertFalse(os.path.isfile(os.path.join(self.manager._tmp_dir,
54 genuine_file.split('/')[-1])))
55 self.assertFalse(os.path.isfile(os.path.join(self.manager._tmp_dir,
56 genuine_file2.split('/')[-1])))
57
58
59 self.manager.close()
60 self.manager = None
61
62
63if __name__ == '__main__':
64 # no coverage for:
65 # specifying ssh user
66 # scp failure
67 # an error in closing the ssh connection
68 # another error in closing the ssh connection
69 # mkdir failure
70 # rmdir failure
71 unittest.main()
72
073
=== modified file 'src/sextant/update_db.py'
--- src/sextant/update_db.py 2014-09-29 14:01:39 +0000
+++ src/sextant/update_db.py 2014-10-23 12:33:12 +0000
@@ -5,72 +5,106 @@
5# -----------------------------------------5# -----------------------------------------
6# Given a program file to upload, or a program name to delete from the server, does the right thing.6# Given a program file to upload, or a program name to delete from the server, does the right thing.
77
8from __future__ import print_function
9
8__all__ = ("upload_program", "delete_program")10__all__ = ("upload_program", "delete_program")
911
10from .db_api import SextantConnection, Validator12from .db_api import SextantConnection
11from .objdump_parser import get_parsed_objects13from .sshmanager import SSHConnectionError
14from .objdump_parser import Parser, run_objdump
12from os import path15from os import path
16from time import time
17import subprocess
18import sys
1319
14import logging20import logging
1521
1622def upload_program(connection, user_name, file_path, program_name=None,
17def upload_program(user_name, file_path, db_url, display_url='',23 not_object_file=False):
18 alternative_name=None, not_object_file=False):24 """
19 """25 Upload a program's functions and call graph to the database.
20 Uploads a program to the remote database.26
2127 Arguments:
22 Raises requests.exceptions.ConnectionError if the server didn't exist.28 connection:
23 Raises IOError if file_path doesn't correspond to a file.29 The SextantConnection object that manages the database connection.
24 Raises ValueError if the desired alternative_name (or the default, if no30 user_name:
25 alternative_name was specified) already exists in the database.31 The user name of the user uploading the program.
26 :param file_path: the path to the local file we wish to upload32 file_path:
27 :param db_url: the URL of the database (eg. http://localhost:7474)33 The path to either: the output of objdump (if not_object_file is
28 :param display_url: alternative URL to display instead of db_url34 True) OR to a binary file if (not_object_file is False).
29 :param alternative_name: a name to give the program to override the default35 program_name:
30 :param object_file: bool(the file is an objdump text output file, rather than a compiled binary)36 An optional name to give the program in the database, if not
3137 specified then <user_name>-<file name> will be used.
32 """38 not_object_file:
3339 Flag controlling whether file_path is pointing to a dump file or
34 if not display_url:40 a binary file.
35 display_url = db_url41 """
3642 if not connection._ssh:
37 # if no name is specified, use the form "<username>-<binary name>"43 raise SSHConnectionError('An SSH connection is required for '
38 name = alternative_name or (user_name + '-' + path.split(file_path)[-1])44 'program upload.')
3945
40 connection = SextantConnection(db_url)46 if not program_name:
4147 file_no_ext = path.basename(file_path).split('.')[0]
42 program_names = connection.get_program_names()48 program_name = '{}-{}'.format(user_name, file_no_ext)
43 if Validator.sanitise(name) in program_names:49
44 raise ValueError("There is already a program with name {}; "50
45 "please delete the previous one with the same name "51 if program_name in connection.get_program_names():
46 "and retry, or rename the input file.".format(name))52 raise ValueError('A program with name `{}` already exists in the database'
4753 .format(program_name))
48 parsed_objects = get_parsed_objects(filepath=file_path,54
49 sections_to_view=['.text'],55
50 not_object_file=not_object_file,56 print('Uploading `{}` to the database. '
51 ignore_function_pointers=False)57 'This may take some time for larger programs.'
5258 .format(program_name))
53 logging.info('Objdump has parsed!')59 start = time()
5460
55 program_representation = connection.new_program(Validator.sanitise(name))61 if not not_object_file:
5662 print('Generating dump file...', end='')
57 for obj in parsed_objects:63 sys.stdout.flush()
58 for called in obj.what_do_i_call:64 file_path, file_object = run_objdump(file_path)
59 if not program_representation.add_function_call(obj.name, called[-1]): # called is a tuple (address, name)65 print('done.')
60 logging.error('Validation error: {} calling {}'.format(obj.name, called[-1]))66 else:
6167 file_object = None
62 logging.info('Sending {} named objects to server {}...'.format(len(parsed_objects), display_url))68
63 program_representation.commit()69 # Make parser and wire to DBprogram.
64 logging.info('Successfully added {}.'.format(name))70 with connection.new_program(program_name) as program:
6571
6672 def start_parser(program):
67def delete_program(program_name, db_url):73 print('Parsing dump file...', end='')
68 """74 sys.stdout.flush()
69 Deletes a program with the specified name from the database.75
70 :param program_name: the name of the program to delete76 def finish_parser(parser, program):
71 :param db_url: the URL of the database (eg. http://localhost:7474)77 # Callback to make sure the program's csv files are flushed when
72 :return: bool(success)78 # the parser completes.
73 """79 program.func_writer.finish()
74 connection = SextantConnection(db_url)80 program.call_writer.finish()
81
82 print('done: {} functions and {} calls.'
83 .format(parser.function_count, parser.call_count))
84
85 parser = Parser(file_path = file_path, file_object = file_object,
86 sections=[],
87 add_function = program.add_function,
88 add_call = program.add_call,
89 started=lambda parser: start_parser(program),
90 finished=lambda parser: finish_parser(parser, program))
91 parser.parse()
92
93 program.commit()
94
95 end = time()
96 print('Finished in {:.2f}s.'.format(end-start))
97
98
99def delete_program(connection, program_name):
100 """
101 Remove the specified program from the database.
102
103 Arguments:
104 connection:
105 The SextantConnection object managing the database connection.
106 program_name:
107 The name of the program to remove from the database.
108 """
75 connection.delete_program(program_name)109 connection.delete_program(program_name)
76 print('Successfully deleted {}.'.format(program_name))110
77111
=== modified file 'src/sextant/web/server.py'
--- src/sextant/web/server.py 2014-10-03 11:47:52 +0000
+++ src/sextant/web/server.py 2014-10-23 12:33:12 +0000
@@ -26,7 +26,8 @@
2626
27from cgi import escape # deprecated in Python 3 in favour of html.escape, but we're stuck on Python 227from cgi import escape # deprecated in Python 3 in favour of html.escape, but we're stuck on Python 2
2828
29database_url = None # the URL to access the database instance29# global SextantConnection object which deals with the port forwarding
30CONNECTION = None
3031
31RESPONSE_CODE_OK = 20032RESPONSE_CODE_OK = 200
32RESPONSE_CODE_BAD_REQUEST = 40033RESPONSE_CODE_BAD_REQUEST = 400
@@ -67,25 +68,6 @@
6768
68class SVGRenderer(Resource):69class SVGRenderer(Resource):
6970
70 def error_creating_neo4j_connection(self, failure):
71 self.write("Error creating Neo4J connection: %s\n") % failure.getErrorMessage()
72
73 @staticmethod
74 def create_neo4j_connection():
75 return db_api.SextantConnection(database_url)
76
77 @staticmethod
78 def check_program_exists(connection, name):
79 return connection.check_program_exists(name)
80
81 @staticmethod
82 def get_whole_program(connection, name):
83 return connection.get_whole_program(name)
84
85 @staticmethod
86 def get_functions_calling(connection, progname, funcname):
87 return connection.get_all_functions_calling(progname, funcname)
88
89 @staticmethod71 @staticmethod
90 def get_plot(program, suppress_common_functions=False, remove_self_calls=False):72 def get_plot(program, suppress_common_functions=False, remove_self_calls=False):
91 graph_dot = export.ProgramConverter.to_dot(program, suppress_common_functions,73 graph_dot = export.ProgramConverter.to_dot(program, suppress_common_functions,
@@ -111,7 +93,7 @@
111 res_msg = None # set this in the logic93 res_msg = None # set this in the logic
11294
113 #95 #
114 # Get program name and database connection, check if program exists96 # Check if provided program name exists
115 #97 #
11698
117 name = args.get('program_name', [None])[0]99 name = args.get('program_name', [None])[0]
@@ -121,16 +103,7 @@
121 res_msg = "Supply 'program_name' parameter."103 res_msg = "Supply 'program_name' parameter."
122104
123 if res_code is RESPONSE_CODE_OK:105 if res_code is RESPONSE_CODE_OK:
124 try:106 exists = yield deferToThread(CONNECTION.check_program_exists, name)
125 conn = yield deferToThread(self.create_neo4j_connection)
126 except requests.exceptions.ConnectionError:
127 res_code = RESPONSE_CODE_BAD_GATEWAY
128 res_fmt = "Could not reach Neo4j server at {}"
129 res_msg = res_fmt.format(database_url)
130 conn = None
131
132 if res_code is RESPONSE_CODE_OK:
133 exists = yield deferToThread(self.check_program_exists, conn, name)
134 if not exists:107 if not exists:
135 res_code = RESPONSE_CODE_NOT_FOUND108 res_code = RESPONSE_CODE_NOT_FOUND
136 res_fmt = "Program {} not found in database."109 res_fmt = "Program {} not found in database."
@@ -146,28 +119,23 @@
146 # look for in request.args, both tuples119 # look for in request.args, both tuples
147 queries = {120 queries = {
148 'whole_program': (121 'whole_program': (
149 self.get_whole_program,122 CONNECTION.get_whole_program,
150 (conn, name),
151 ()123 ()
152 ),124 ),
153 'functions_calling': (125 'functions_calling': (
154 self.get_functions_calling,126 CONNECTION.get_all_functions_calling,
155 (conn, name),
156 ('func1',)127 ('func1',)
157 ),128 ),
158 'functions_called_by': (129 'functions_called_by': (
159 conn.get_all_functions_called,130 CONNECTION.get_all_functions_called,
160 (name,),
161 ('func1',)131 ('func1',)
162 ),132 ),
163 'all_call_paths': (133 'all_call_paths': (
164 conn.get_call_paths,134 CONNECTION.get_call_paths,
165 (name,),
166 ('func1', 'func2')135 ('func1', 'func2')
167 ),136 ),
168 'shortest_call_path': (137 'shortest_call_path': (
169 conn.get_shortest_path_between_functions,138 CONNECTION.get_shortest_path_between_functions,
170 (name,),
171 ('func1', 'func2')139 ('func1', 'func2')
172 )140 )
173 }141 }
@@ -186,7 +154,7 @@
186154
187 # extract any required keyword arguments from request.args155 # extract any required keyword arguments from request.args
188 if res_code is RESPONSE_CODE_OK:156 if res_code is RESPONSE_CODE_OK:
189 fn, known_args, kwargs = query157 fn, kwargs = query
190 158
191 # all args will be strings - use None to indicate missing argument159 # all args will be strings - use None to indicate missing argument
192 req_args = tuple(args.get(kwarg, [None])[0] for kwarg in kwargs)160 req_args = tuple(args.get(kwarg, [None])[0] for kwarg in kwargs)
@@ -202,9 +170,8 @@
202 # if we are okay here we have a valid query with all required arguments170 # if we are okay here we have a valid query with all required arguments
203 if res_code is RESPONSE_CODE_OK:171 if res_code is RESPONSE_CODE_OK:
204 try:172 try:
205 all_args = known_args + req_args
206 program = yield defer_to_thread_with_timeout(render_timeout, fn,173 program = yield defer_to_thread_with_timeout(render_timeout, fn,
207 *all_args)174 name, *req_args)
208 except defer.CancelledError:175 except defer.CancelledError:
209 # the timeout has fired and cancelled the request176 # the timeout has fired and cancelled the request
210 res_code = RESPONSE_CODE_BAD_REQUEST177 res_code = RESPONSE_CODE_BAD_REQUEST
@@ -247,16 +214,12 @@
247class GraphProperties(Resource):214class GraphProperties(Resource):
248215
249 @staticmethod216 @staticmethod
250 def _get_connection():217 def _get_program_names():
251 return db_api.SextantConnection(database_url)218 return CONNECTION.get_program_names()
252219
253 @staticmethod220 @staticmethod
254 def _get_program_names(connection):221 def _get_function_names(program_name):
255 return connection.get_program_names()222 return CONNECTION.get_function_names(program_name)
256
257 @staticmethod
258 def _get_function_names(connection, program_name):
259 return connection.get_function_names(program_name)
260223
261 @defer.inlineCallbacks224 @defer.inlineCallbacks
262 def _render_GET(self, request):225 def _render_GET(self, request):
@@ -269,18 +232,9 @@
269232
270 query = request.args['query'][0]233 query = request.args['query'][0]
271234
272 try:
273 neo4j_connection = yield deferToThread(self._get_connection)
274 except Exception:
275 request.setResponseCode(502) # Bad Gateway
276 request.write("Could not reach Neo4j server at {}.".format(database_url))
277 request.finish()
278 defer.returnValue(None)
279 neo4j_connection = None # just to silence the "referenced before assignment" warnings
280
281 if query == 'programs':235 if query == 'programs':
282 request.setHeader("content-type", "application/json")236 request.setHeader("content-type", "application/json")
283 prognames = yield deferToThread(self._get_program_names, neo4j_connection)237 prognames = yield deferToThread(self._get_program_names)
284 request.write(json.dumps(list(prognames)))238 request.write(json.dumps(list(prognames)))
285 request.finish()239 request.finish()
286 defer.returnValue(None)240 defer.returnValue(None)
@@ -294,7 +248,7 @@
294 defer.returnValue(None)248 defer.returnValue(None)
295 program_name = request.args['program_name'][0]249 program_name = request.args['program_name'][0]
296250
297 funcnames = yield deferToThread(self._get_function_names, neo4j_connection, program_name)251 funcnames = yield deferToThread(self._get_function_names, program_name)
298 if funcnames is None:252 if funcnames is None:
299 request.setResponseCode(404)253 request.setResponseCode(404)
300 request.setHeader("content-type", "text/plain")254 request.setHeader("content-type", "text/plain")
@@ -319,10 +273,12 @@
319 return NOT_DONE_YET273 return NOT_DONE_YET
320274
321275
322def serve_site(input_database_url='http://localhost:7474', port=2905):276def serve_site(connection, port):
323277 global CONNECTION
324 global database_url278
325 database_url = input_database_url279 CONNECTION = connection
280
281
326 # serve static directory at root282 # serve static directory at root
327 root = File(os.path.join(environment.RESOURCES_DIR, 'sextant', 'web'))283 root = File(os.path.join(environment.RESOURCES_DIR, 'sextant', 'web'))
328284

Subscribers

People subscribed via source and target branches