1
=== modified file 'src/sextant/__main__.py'
2
--- src/sextant/__main__.py	2014-10-03 13:00:52 +0000
3
+++ src/sextant/__main__.py	2014-10-23 12:33:12 +0000
4
@@ -9,7 +9,6 @@
5
9
9
6
10
import io
10
import io
7
11
import sys
11
import sys
8
12
import random
9
13
import socket
12
import socket
10
14
import logging
13
import logging
11
15
import logging.config
14
import logging.config
12
@@ -28,10 +27,12 @@
13
28
from . import db_api
27
from . import db_api
14
29
from . import update_db
28
from . import update_db
15
30
from . import environment
29
from . import environment
16
30
from . import sshmanager
17
31
31
18
32
config = environment.load_config()
32
config = environment.load_config()
19
33
33
20
34
34
21
35
22
35
def _displayable_url(args):
36
def _displayable_url(args):
23
36
    """
37
    """
24
37
    Return the URL specified by the user for Sextant to look at.
38
    Return the URL specified by the user for Sextant to look at.
25
@@ -56,7 +57,7 @@
26
56
57
27
57
# Beginning of functions which handle the actual invocation of Sextant
58
# Beginning of functions which handle the actual invocation of Sextant
28
58
59
30
59
def _start_web(args):
60
def _start_web(connection, args):
31
60
    # Don't import at top level - makes twisted dependency semi-optional,
61
    # Don't import at top level - makes twisted dependency semi-optional,
32
61
    # allowing non-web functionality to work with Python 3.
62
    # allowing non-web functionality to work with Python 3.
33
62
    if sys.version_info[0] == 2:
63
    if sys.version_info[0] == 2:
34
@@ -68,12 +69,12 @@
35
68
    logging.info("Serving site on port {}".format(args.port))
69
    logging.info("Serving site on port {}".format(args.port))
36
69
70
37
70
    # server is .web.server, imported a couple of lines ago
71
    # server is .web.server, imported a couple of lines ago
42
71
    server.serve_site(input_database_url=args.remote_neo4j, port=args.port)
72
    server.serve_site(connection, args.port)
43
72
73
44
73
74
45
74
def _audit(args):
75
def _audit(connection, args):
46
75
    try:
76
    try:
48
76
        audited = query.audit(args.remote_neo4j)
77
        audited = query.audit(connection)
49
77
    except requests.exceptions.ConnectionError as e:
78
    except requests.exceptions.ConnectionError as e:
50
78
        msg = 'Connection error to server {url}: {exception}'
79
        msg = 'Connection error to server {url}: {exception}'
51
79
        logging.error(msg.format(url=_displayable_url(args), exception=e))
80
        logging.error(msg.format(url=_displayable_url(args), exception=e))
52
@@ -87,8 +88,8 @@
53
87
        titles = ("Name", "#Func", "Uploader", "User-ID", "Upload Date")
88
        titles = ("Name", "#Func", "Uploader", "User-ID", "Upload Date")
54
88
        colminlens = (len(entry) for entry in titles)
89
        colminlens = (len(entry) for entry in titles)
55
89
        # maximum lengths to avoid one entry from throwing the whole table
90
        # maximum lengths to avoid one entry from throwing the whole table
58
90
        # date format is <YYYY:MM:DD HH:MM:SS.UUUUUU> = 26 characters
91
        # date format is <YYYY-MM-DD HH:MM:SS> = 19 characters
59
91
        COLMAXLENS = (25, 5, 25, 10, 26)
92
        COLMAXLENS = (25, 6, 25, 10, 19)
60
92
93
61
93
        # make a table of the strings of each data entry we will display
94
        # make a table of the strings of each data entry we will display
62
94
        text = [map(str, (p.program_name, p.number_of_funcs,
95
        text = [map(str, (p.program_name, p.number_of_funcs,
63
@@ -120,7 +121,7 @@
64
120
        print('\n'.join(st.format(*pentry) for pentry in text))
121
        print('\n'.join(st.format(*pentry) for pentry in text))
65
121
122
66
122
123
68
123
def _add_program(args):
124
def _add_program(connection, args):
69
124
    try:
125
    try:
70
125
        alternative_name = args.name_in_db[0]
126
        alternative_name = args.name_in_db[0]
71
126
    except TypeError:
127
    except TypeError:
72
@@ -131,12 +132,11 @@
73
131
    # unsupplied
132
    # unsupplied
74
132
133
75
133
    try:
134
    try:
82
134
        update_db.upload_program(user_name=getpass.getuser(),
135
        update_db.upload_program(connection, 
83
135
                                 file_path=args.input_file,
136
                                 getpass.getuser(),
84
136
                                 db_url=args.remote_neo4j,
137
                                 args.input_file,
85
137
                                 alternative_name=alternative_name,
138
                                 alternative_name,
86
138
                                 not_object_file=not_object_file,
139
                                 not_object_file)
81
139
                                 display_url=_displayable_url(args))
87
140
    except requests.exceptions.ConnectionError as e:
140
    except requests.exceptions.ConnectionError as e:
88
141
        msg = 'Connection error to server {}: {}'
141
        msg = 'Connection error to server {}: {}'
89
142
        logging.error(msg.format(_displayable_url(args), e))
142
        logging.error(msg.format(_displayable_url(args), e))
90
@@ -147,41 +147,41 @@
91
147
        logging.error('Input file {} was not found.'.format(args.input_file[0]))
147
        logging.error('Input file {} was not found.'.format(args.input_file[0]))
92
148
        logging.error(e)
148
        logging.error(e)
93
149
        logging.debug(e, exc_info=True)
149
        logging.debug(e, exc_info=True)
95
150
    except ValueError as e:
150
    except (ValueError, sshmanager.SSHConnectionError) as e:
96
151
        logging.error(e)
151
        logging.error(e)
97
152
152
98
153
153
105
154
def _delete_program(namespace):
154
def _delete_program(connection, args):
106
155
    update_db.delete_program(namespace.program_name,
155
    update_db.delete_program(connection, args.program_name)
107
156
                             namespace.remote_neo4j)
156
108
157
157
109
158
158
def _make_query(connection, args):
104
159
def _make_query(namespace):
110
160
    arg1 = None
159
    arg1 = None
111
161
    arg2 = None
160
    arg2 = None
112
162
    try:
161
    try:
115
163
        arg1 = namespace.funcs[0]
162
        arg1 = args.funcs[0]
116
164
        arg2 = namespace.funcs[1]
163
        arg2 = args.funcs[1]
117
165
    except TypeError:
164
    except TypeError:
118
166
        pass
165
        pass
119
167
    except IndexError:
166
    except IndexError:
120
168
        pass
167
        pass
121
169
168
122
170
    try:
169
    try:
124
171
        program_name = namespace.program[0]
170
        program_name = args.program[0]
125
172
    except TypeError:
171
    except TypeError:
126
173
        program_name = None
172
        program_name = None
127
174
173
128
175
    try:
174
    try:
130
176
        suppress_common = namespace.suppress_common[0]
175
        suppress_common = args.suppress_common[0]
131
177
    except TypeError:
176
    except TypeError:
132
178
        suppress_common = False
177
        suppress_common = False
133
179
178
137
180
    query.query(remote_neo4j=namespace.remote_neo4j,
179
    query.query(remote_neo4j=args.remote_neo4j,
138
181
                display_neo4j=_displayable_url(namespace),
180
                display_neo4j=_displayable_url(args),
139
182
                input_query=namespace.query,
181
                input_query=args.query,
140
183
                program_name=program_name,
182
                program_name=program_name,
142
184
                argument_1=arg1, argument_2=arg2,
183
                argument_1=arg1,
143
184
                argument_2=arg2,
144
185
                suppress_common=suppress_common)
185
                suppress_common=suppress_common)
145
186
186
146
187
# End of functions which invoke Sextant
187
# End of functions which invoke Sextant
147
@@ -197,8 +197,10 @@
148
197
197
149
198
    """
198
    """
150
199
199
153
200
    argumentparser = argparse.ArgumentParser(prog='sextant', usage='sextant', description="Invoke part of the SEXTANT program")
200
    ap = argparse.ArgumentParser(prog='sextant',
154
201
    subparsers = argumentparser.add_subparsers(title="subcommands")
201
                                 usage='sextant', 
155
202
                                 description="Invoke part of the SEXTANT program")
156
203
    subparsers = ap.add_subparsers(title="subcommands")
157
202
204
158
203
    #set what will be defined as a "common function"
205
    #set what will be defined as a "common function"
159
204
    db_api.set_common_cutoff(config.common_cutoff)
206
    db_api.set_common_cutoff(config.common_cutoff)
160
@@ -257,10 +259,9 @@
161
257
        parsers[key].add_argument('--remote-neo4j', metavar="URL",
259
        parsers[key].add_argument('--remote-neo4j', metavar="URL",
162
258
                                  help="URL of neo4j server", type=str,
260
                                  help="URL of neo4j server", type=str,
163
259
                                  default=config.remote_neo4j)
261
                                  default=config.remote_neo4j)
168
260
        parsers[key].add_argument('--use-ssh-tunnel', metavar="BOOL", type=str,
262
        parsers[key].add_argument('--no-ssh-tunnel',
169
261
                                  help="whether to SSH into the remote server,"
263
                                  help='Disable ssh tunnelling. Prevents program upload.',
170
262
                                       "True/False",
264
                                  action='store_true')
167
263
                                  default=str(config.use_ssh_tunnel))
171
264
        parsers[key].add_argument('--ssh-user', metavar="NAME", type=str,
265
        parsers[key].add_argument('--ssh-user', metavar="NAME", type=str,
172
265
                                  help="username to use as remote SSH name",
266
                                  help="username to use as remote SSH name",
173
266
                                  default=str(config.ssh_user))
267
                                  default=str(config.ssh_user))
174
@@ -273,207 +274,28 @@
175
273
274
176
274
    # parse the arguments
275
    # parse the arguments
177
275
276
308
276
    return argumentparser.parse_args()
277
    return ap.parse_args()
179
277
180
278
181
279
def _start_tunnel(local_port, remote_host, remote_port, ssh_user=''):
182
280
    """
183
281
    Creates an SSH port-forward.
184
282
185
283
    This will result in localhost:local_port appearing to be
186
284
    remote_host:remote_port.
187
285
188
286
    :param local_port: integer port number to open at localhost
189
287
    :param remote_host: string address of remote host (no port number)
190
288
    :param remote_port: port to 'open' on the remote host
191
289
    :param ssh_user: user to log in on the remote_host as
192
290
193
291
    """
194
292
195
293
    if not (isinstance(local_port, int) and local_port > 0):
196
294
        raise ValueError(
197
295
            'Local port {} must be a positive integer.'.format(local_port))
198
296
    if not (isinstance(remote_port, int) and remote_port > 0):
199
297
        raise ValueError(
200
298
            'Remote port {} must be a positive integer.'.format(remote_port))
201
299
202
300
    logging.debug('Starting SSH tunnel...')
203
301
204
302
    # this cmd string will be .format()ed in a few lines' time
205
303
    cmd = ['ssh']
206
304
207
305
    if ssh_user:
208
306
        # ssh -l {user} ... sets the remote login username
209
307
        cmd += ['-l', ssh_user]
210
308
211
309
    # -L localport:localhost:remoteport forwards the port
212
310
    # -M makes SSH able to accept slave connections
213
311
    # -S sets the location of a control socket (in this case, sextant-controller
214
312
    #    with a unique identifier appended, just in case we run sextant twice
215
313
    #    simultaneously), so we know how to close the port again
216
314
    # -f goes into background; -N does not execute a remote command;
217
315
    # -T says to remote host that we don't want a text shell.
218
316
    cmd += ['-M',
219
317
            '-S', 'sextantcontroller{tunnel_id}'.format(tunnel_id=local_port),
220
318
            '-fNT',
221
319
            '-L', '{0}:localhost:{1}'.format(local_port, remote_port),
222
320
            remote_host]
223
321
224
322
    logging.debug('Running {}'.format(' '.join(cmd)))
225
323
226
324
    exit_code = subprocess.call(cmd)
227
325
    if exit_code:
228
326
        raise OSError('SSH setup failed with error {}'.format(exit_code))
229
327
230
328
    logging.debug('SSH tunnel created.')
231
329
232
330
233
331
def _stop_tunnel(local_port, remote_host):
234
332
    """
235
333
    Tear down an SSH port-forward which was previously set up with start_tunnel.
236
334
237
335
    We use local_port as an identifier.
238
336
    :param local_port: the port on localhost we are using as the entrypoint
239
337
    :param remote_host: remote host we tunnelled into
240
338
241
339
    """
242
340
243
341
    logging.debug('Shutting down SSH tunnel...')
244
342
245
343
    # ssh -O sends a command to the slave specified in -S
246
344
    cmd = ['ssh',
247
345
           '-S', 'sextantcontroller{}'.format(local_port),
248
346
           '-O', 'exit',
249
347
           '-q',  # for quiet
250
348
           remote_host]
251
349
252
350
    # SSH has a bug on some systems which causes it to ignore the -q flag
253
351
    # meaning it prints "Exit request sent." to stderr.
254
352
    # To avoid this, we grab stderr temporarily, and see if it's that string;
255
353
    # if it is, suppress it.
256
354
    pr = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
257
355
    stdout, stderr = pr.communicate()
258
356
    if stderr.rstrip() != 'Exit request sent.':
259
357
        print(stderr, file=sys.stderr)
260
358
    if pr.returncode == 0:
261
359
        logging.debug('Shut down successfully.')
262
360
    else:
263
361
        logging.warning(
264
362
            'SSH tunnel shutdown returned error code {}'.format(pr.returncode))
265
363
        logging.warning(stderr)
266
364
267
365
268
366
def _is_port_used(port):
269
367
    """
270
368
    Checks with the OS to see whether a port is open.
271
369
272
370
    Beware: port is passed directly to the shell. Make sure it is an integer.
273
371
    We raise ValueError if it is not.
274
372
    :param port: integer port to check for openness
275
373
    :return: bool(port is in use)
276
374
277
375
    """
278
376
279
377
    # we follow http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
280
378
    if not (isinstance(port, int) and port > 0):
281
379
        raise ValueError('port {} must be a positive integer.'.format(port))
282
380
283
381
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
284
382
    try:
285
383
        sock.bind(('127.0.0.1', port))
286
384
    except socket.error as e:
287
385
        if e.errno == 98:  # Address already in use
288
386
            return True
289
387
        raise
290
388
291
389
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
292
390
293
391
    return False  # that is, the port is not used
294
392
295
393
296
394
def _get_unused_port():
297
395
    """
298
396
    Returns a port number between 10000 and 50000 which is not currently open.
299
397
300
398
    """
301
399
302
400
    keep_going = True
303
401
    while keep_going:
304
402
        portnum = random.randint(10000, 50000)
305
403
        keep_going = _is_port_used(portnum)
306
404
    return portnum
307
405
309
406
278
310
407
def _get_host_and_port(url):
279
def _get_host_and_port(url):
327
408
    """Given a URL as http://host:port, returns (host, port)."""
280
        """Given a URL as http://host:port, returns (host, port)."""
328
409
    parsed = parse.urlparse(url)
281
        parsed = parse.urlparse(url)
329
410
    return (parsed.hostname, parsed.port)
282
        return (parsed.hostname, parsed.port)
314
411
315
412
316
413
def _is_localhost(host, port):
317
414
    """
318
415
    Checks whether a host is an alias to localhost.
319
416
320
417
    Raises socket.gaierror if the host was not found.
321
418
322
419
    """
323
420
324
421
    addr = socket.getaddrinfo(host, port)[0][4][0]
325
422
326
423
    return addr in ('127.0.0.1', '::1')
330
424
283
331
425
284
332
426
def main():
285
def main():
333
427
    args = parse_arguments()
286
    args = parse_arguments()
383
428
287
    remotehost, remoteport = _get_host_and_port(args.remote_neo4j)
384
429
    if args.use_ssh_tunnel.lower() == 'true':
288
    no_ssh_tunnel = args.no_ssh_tunnel
385
430
        localport = _get_unused_port()
289
    connection = None
386
431
290
387
432
        remotehost, remoteport = _get_host_and_port(args.remote_neo4j)
291
    try:
388
433
292
        conn_args = (remotehost, remoteport, no_ssh_tunnel)
389
434
        try:
293
        with db_api.SextantConnection(*conn_args) as connection:
390
435
            is_loc = _is_localhost(remotehost, remoteport)
294
            args.func(connection, args)
391
436
        except socket.gaierror:
295
    except sshmanager.SSHConnectionError as e:
392
437
            logging.error('Server {} not found.'.format(remotehost))
296
        print(e.message)
393
438
            return
297
394
439
298
    
346
440
        if is_loc:
347
441
            # we are attempting to connect to localhost anyway, so we won't
348
442
            # bother to SSH to it.
349
443
            # There may be some ways the user can trick us into trying to SSH
350
444
            # to localhost anyway, but this will do as a first pass.
351
445
            # SSHing to localhost is undesirable because on my test computer,
352
446
            # we get 'connection refused' if we try.
353
447
            args.func(args)
354
448
355
449
        else:  # we need to SSH
356
450
            try:
357
451
                _start_tunnel(localport, remotehost, remoteport,
358
452
                              ssh_user=args.ssh_user)
359
453
            except OSError as e:
360
454
                logging.error(str(e))
361
455
                return
362
456
            except KeyboardInterrupt:
363
457
                logging.info('Halting because of user interrupt.')
364
458
                return
365
459
366
460
            try:
367
461
                args.display_neo4j = args.remote_neo4j
368
462
                args.remote_neo4j = 'http://localhost:{}'.format(localport)
369
463
                args.func(args)
370
464
            except KeyboardInterrupt:
371
465
                # this probably happened because we were running Sextant Web
372
466
                # and Ctrl-C'ed out of it
373
467
                logging.info('Keyboard interrupt detected. Halting.')
374
468
                pass
375
469
376
470
            finally:
377
471
                _stop_tunnel(localport, remotehost)
378
472
379
473
    else:  # no need to set up the ssh, just run sextant
380
474
        args.func(args)
381
475
382
476
395
477
if __name__ == '__main__':
299
if __name__ == '__main__':
396
478
    main()
300
    main()
397
479
301
398
480
302
399
=== added file 'src/sextant/csvwriter.py'
400
--- src/sextant/csvwriter.py	1970-01-01 00:00:00 +0000
401
+++ src/sextant/csvwriter.py	2014-10-23 12:33:12 +0000
402
@@ -0,0 +1,152 @@
403
1
import logging
404
2
405
3
"""
406
4
Provide a class for writing to row-limited csv files.
407
5
"""
408
6
__all__ = ('CSVWriter',)
409
7
410
8
411
9
class CSVWriter(object):
412
10
    """
413
11
    Write to csv files, automatically opening new ones at row maximum.
414
12
415
13
    Provides a write(*args) method which will add a row to the currently open
416
14
    csv file (internally managed) if there is room in it, otherwise close it,
417
15
    silently open a new one and write to that.
418
16
419
17
    Attributes:
420
18
        base_path:
421
19
            The base path of the output files - which will have a full path
422
20
            of form "<base_path><number>.csv"
423
21
        headers:
424
22
            A list or tuple of strings which will be used as the column
425
23
            headers. Attempts to write a row of data will induce a check
426
24
            that the length of the data provided is exactly that of this
427
25
            argument.
428
26
        max_rows:
429
27
            The maximum number of rows to write in each file (including the
430
28
            header row) before opening a new file.
431
29
432
30
        _fmt:
433
31
            The format string which will be used to write a row to the csv
434
32
            file. Of form '{},{},...,{}\n'.
435
33
        _file:
436
34
            The currently open file.
437
35
        _file_count:
438
36
            The number of files that the CSVWriter has written to. The next
439
37
            file to be opened will have name '<base_path><_file_count>.csv'
440
38
        _row_count:
441
39
            The number of rows (including the header row) in the current file.
442
40
        _total_row_count:
443
41
            The number of rows (including the header rows) in ALL files.
444
42
445
43
    """
446
44
    # Filename fmt of output files - used with .format(base_path, number).
447
45
    _file_fmt = '{}{}.csv'
448
46
449
47
    def __init__(self, base_path, headers, max_rows):
450
48
        """
451
49
        Initialise the writer for writing.
452
50
453
51
        Arguments:
454
52
            base_path:
455
53
                The base path of the output files - which will have a full path
456
54
                of form "<base_path><number>.csv"
457
55
            headers:
458
56
                A list or tuple of strings which will be used as the column
459
57
                headers. Attempts to write a row of data will induce a check
460
58
                that the length of the data provided is exactly that of this
461
59
                argument.
462
60
            max_rows:
463
61
                The maximum number of rows to write in each file (including the
464
62
                header row) before opening a new file.
465
63
        """
466
64
        self.base_path = base_path
467
65
        self.headers = headers
468
66
        self.max_rows = max_rows
469
67
470
68
        self._fmt = ','.join('{}' for h in headers) + '\n'
471
69
472
70
        # The number of the file we are on and the line in it.
473
71
        self._file = None
474
72
        self._file_count = 0
475
73
        self._row_count = 0
476
74
477
75
        self._total_row_count = 0
478
76
479
77
        self._open_new_file()
480
78
481
79
    def _open_new_file(self):
482
80
        """
483
81
        Open a new file for editing, writing the headers in the first row.
484
82
        """
485
83
        self._close_file()
486
84
487
85
        path = CSVWriter._file_fmt.format(self.base_path, self._file_count)
488
86
        self._file = open(path, 'w+')
489
87
        self._file_count += 1
490
88
        self.write(*self.headers)
491
89
492
90
    def _close_file(self):
493
91
        """
494
92
        Close the current file.
495
93
        
496
94
        NOTE that this method should ALWAYS be called before attempting to read
497
95
        from the file as it ensures that all changes have been written to disk,
498
96
        not only buffered.
499
97
        """
500
98
        if self._file and not self._file.closed:
501
99
            logging.debug('csvwriter wrote {} lines to {}'
502
100
                          .format(self._row_count, self._file.name))
503
101
            self._file.close()
504
102
505
103
        self._row_count = 0
506
104
507
105
    def write(self, *args):
508
106
        """
509
107
        Add a row the to current file, or to a new one if max_rows is reached.
510
108
511
109
        The check against max_rows is made BEFORE writing the line.
512
110
513
111
        Raises:
514
112
            ValueError:
515
113
                If the length of *args is not exactly the length of
516
114
                self.headers - i.e. on attempt to write too many/too few items.
517
115
518
116
        Arguments:
519
117
            *args:
520
118
                Strings, which will be written into the columns of the current
521
119
                open csv file.
522
120
        """
523
121
        if not len(args) == len(self.headers):
524
122
            msg = 'Attempted to write {} entries to file {} with {} columns'
525
123
            raise ValueError(msg.format(len(args), self.base_path, 
526
124
                                        len(self.headers)))
527
125
528
126
        if self._row_count == self.max_rows:
529
127
            self._close_file()
530
128
            self._open_new_file()
531
129
532
130
        self._file.write(self._fmt.format(*args))
533
131
        self._row_count += 1
534
132
        self._total_row_count += 1
535
133
536
134
    def file_iter(self):
537
135
        """
538
136
        Return an iterator over the names of the files the writer has 
539
137
        written to.
540
138
        """
541
139
        fmt = CSVWriter._file_fmt
542
140
        return (fmt.format(self.base_path, i) for i in range(self._file_count))
543
141
544
142
    def finish(self):
545
143
        """
546
144
        Flush and close the current file. If a subsequent call to self.write
547
145
        is made, a new file will be created to contain it.
548
146
549
147
        Return the number of files we have written to and the total number
550
148
        of lines we have written.
551
149
        """
552
150
        self._close_file()
553
151
        return self._file_count, self._total_row_count 
554
152
555
0
153
556
=== modified file 'src/sextant/db_api.py'
557
--- src/sextant/db_api.py	2014-09-03 14:10:07 +0000
558
+++ src/sextant/db_api.py	2014-10-23 12:33:12 +0000
559
@@ -5,208 +5,348 @@
560
5
# -----------------------------------------
5
# -----------------------------------------
561
6
# API to interact with a Neo4J server: upload, query and delete programs in a DB
6
# API to interact with a Neo4J server: upload, query and delete programs in a DB
562
7
7
564
8
__all__ = ("Validator", "AddToDatabase", "FunctionQueryResult", "Function",
8
from __future__ import print_function
565
9
566
10
__all__ = ("validate_query", "DBProgram", "FunctionQueryResult", "Function",
567
9
           "SextantConnection")
11
           "SextantConnection")
568
10
12
569
13
from sys import stdout
570
14
571
11
import re  # for validation of function/program names
15
import re  # for validation of function/program names
572
12
import logging
16
import logging
573
13
from datetime import datetime
17
from datetime import datetime
574
14
import os
18
import os
575
15
import getpass
19
import getpass
576
16
from collections import namedtuple
20
from collections import namedtuple
581
17
21
import random
582
18
from neo4jrestclient.client import GraphDatabase
22
import socket
583
19
import neo4jrestclient.client as client
23
584
20
24
import itertools
585
25
import subprocess
586
26
from time import time
587
27
588
28
import neo4jrestclient.client as neo4jrestclient
589
29
590
30
from sshmanager import SSHManager, SSHConnectionError 
591
31
from csvwriter import CSVWriter
592
32
593
33
# The directory on the local machine to which csv files will be written
594
34
# prior to copy over to the remote server.
595
35
TMP_DIR = '/tmp/sextant'
596
36
597
37
# A function is deemed 'common' if it has more than this
598
38
# many connections.
599
21
COMMON_CUTOFF = 10
39
COMMON_CUTOFF = 10
644
22
# a function is deemed 'common' if it has more than this
40
645
23
# many connections
41
646
24
42
647
25
43
def set_common_cutoff(common_def):
648
26
class Validator():
44
    """
649
27
    """ Sanitises/checks strings, to prevent Cypher injection attacks"""
45
    Sets the number of incoming connections at which we deem a function 'common'
650
28
46
    Default is 10 (which is used if this method is never called).
651
29
    @staticmethod
47
    :param common_def: number of incoming connections
652
30
    def validate(input_):
48
    """
653
31
        """
49
    global COMMON_CUTOFF
654
32
        Checks whether we can allow a string to be passed into a Cypher query.
50
    COMMON_CUTOFF = common_def
655
33
        :param input_: the string we wish to validate
51
656
34
        :return: bool(the string is allowed)
52
657
35
        """
53
def validate_query(string):
658
36
        regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')
54
    """
659
37
        return bool(regex.match(input_))
55
    Checks whether we can allow a string to be passed into a Cypher query.
660
38
56
    :param string: the string we wish to validate
661
39
    @staticmethod
57
    :return: bool(the string is allowed)
662
40
    def sanitise(input_):
58
    """
663
41
        """
59
    regex = re.compile(r'^[A-Za-z0-9\-:\.\$_@\*\(\)%\+,]+$')
664
42
        Strips harmful characters from the given string.
60
    return bool(regex.match(string))
665
43
        :param input_: string to sanitise
61
666
44
        :return: the sanitised string
62
667
45
        """
63
class DBProgram(object):
668
46
        return re.sub(r'[^\.\-_a-zA-Z0-9]+', '', input_)
64
    """
669
47
65
    Representation of a program in the database.
670
48
66
671
49
class AddToDatabase():
67
    Provides add_function and add_call methods which locally register functions
672
50
    """Updates the database, adding functions/calls to a given program"""
68
    and calls. The commit method uploads everything to the database.
673
51
69
674
52
    def __init__(self, program_name='', sextant_connection=None,
70
    Attributes:
675
53
                 uploader='', uploader_id='', date=None):
71
        uploader, uploader_id, program_name, date:
676
54
        """
72
            As in __init__.
677
55
        Object which can be used to add functions and calls to a new program
73
        
678
56
        :param program_name: the name of the new program to be created
74
        _conn:
679
57
          (must already be validated against Validator)
75
            The SextantConnection object managing the database connection.
680
58
        :param sextant_connection: the SextantConnection to use for connections
76
        _ssh:
681
59
        :param uploader: string identifier of user who is uploading
77
            The SSHManager object belonging to the SextantConnection.
682
60
        :param uploader_id: string Unix user-id of logged-in user
78
        _db:
683
61
        :param date: string date of today
79
            The database object belonging to the SextantConnection.
684
62
        """
80
685
63
        # program_name must be alphanumeric, to avoid injection attacks easily
81
        _tmp_dir:
686
64
        if not Validator.validate(program_name):
82
            The user-specific location of the local temporary directory.
687
65
            return
83
688
84
        func_writer:
689
85
            A CSVWriter object which manages the csv files containing the
690
86
            list of functions in the program.
691
87
        call_writer:
692
88
            A CSVWriter object which manages the csv files containing the
693
89
            list of function calls in the program.
694
90
695
91
        add_func_query:
696
92
            A string for the cypher query used to create functions from a csv
697
93
            file.
698
94
        add_call_query:
699
95
            A string for the cypher query used to create funciton calls from
700
96
            a csv file.
701
97
        add_program_query:
702
98
            A string for the cypher query used to create the program node.
703
99
    """
704
100
705
101
    def __init__(self, connection, program_name, uploader, uploader_id, date):
706
102
        """
707
103
        Initialise the database program.
708
104
709
105
        A local temporary folder is created at 'TMP_DIR-<user_name>'. 
710
106
        When functions or calls are added via the add_function/call methods, 
711
107
        they are registered in csv files which are stored in this directory.
712
108
713
109
        Committing the program copies these files to the neo4j server and
714
110
        cleans the local tmp folder.
715
111
716
112
        Raises:
717
113
            ValueError:
718
114
                If the program_name is not alphanumeric.
719
115
            CommandError:
720
116
                If the command to create the temporary directory failed.
721
117
722
118
        Arguments:
723
119
            connection:
724
120
                The SextantConnection object which manages the connection to
725
121
                the database.
726
122
            program_name:
727
123
                The name to register the program under in the database. Must be
728
124
                alphanumeric.
729
125
            uploader:
730
126
                The name of the user who uploaded the program.
731
127
            uploader_id:
732
128
                A numeric id of the user who uploaded the program.
733
129
            date:
734
130
                A string representing the upload date.
735
131
        """
736
132
        # Ensure an alphanumeric program name.
737
133
        if not validate_query(program_name):
738
134
            raise ValueError('program name must be alphanumeric, got: {}'
739
135
                             .format(program_name));
740
136
741
137
        self.uploader = uploader
742
138
        self.uploader_id = uploader_id
743
66
139
744
67
        self.program_name = program_name
140
        self.program_name = program_name
863
68
        self.parent_database_connection = sextant_connection
141
        self.date = date
864
69
        self._functions = {}
142
        
865
70
        self._funcs_tx = None  # transaction for uploading functions
143
        self._conn = connection
866
71
        self._calls_tx = None  # transaction for uploading relationships
144
        self._ssh = connection._ssh
867
72
145
        self._db = connection._db
868
73
        if self.parent_database_connection:
146
        
869
74
            # we'll locally use db for short
147
        self._tmp_dir = '{}-{}'.format(TMP_DIR, getpass.getuser())
870
75
            db = self.parent_database_connection._db
148
871
76
149
        # Make the local tmp file - csv files will be written into here.
872
77
            parent_function = db.nodes.create(name=program_name,
150
        try:
873
78
                                              type='program',
151
            os.makedirs(self._tmp_dir)
874
79
                                              uploader=uploader,
152
        except OSError as e:
875
80
                                              uploader_id=uploader_id,
153
            if e.errno == os.errno.EEXIST: # File already exists.
876
81
                                              date=date)
154
                pass
877
82
            self._parent_id = parent_function.id
155
            else:
878
83
156
                raise e
879
84
            self._funcs_tx = db.transaction(using_globals=False, for_query=True)
157
880
85
            self._calls_tx = db.transaction(using_globals=False, for_query=True)
158
        
881
86
159
        tmp_path = os.path.join(self._tmp_dir, '{}_{{}}'.format(program_name))
882
87
        self._connections = []
160
883
88
161
        self.func_writer = CSVWriter(tmp_path.format('funcs'), 
884
89
    @staticmethod
162
                                     headers=['name', 'type'], 
885
90
    def _get_display_name(function_name):
163
                                     max_rows=5000)
886
91
        """
164
        self.call_writer = CSVWriter(tmp_path.format('calls'), 
887
92
        Gets the name we will display to the user for this function name.
165
                                     headers=['caller', 'callee'], 
888
93
166
                                     max_rows=5000)
889
94
        For instance, if function_name were __libc_start_main@plt, we would
167
890
95
        return ("__libc_start_main", "plt_stub"). The returned function type is
168
        # Define the queries we use to upload the functions and calls.
891
96
        currently one of "plt_stub", "function_pointer" or "normal".
169
        self.add_func_query = (' USING PERIODIC COMMIT 250'
892
97
170
                 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
893
98
        :param function_name: the name straight from objdump of a function
171
                 ' WITH line, toInt(line.id) as lineid'
894
99
        :return: ("display name", "function type")
172
                 ' MATCH (n:program {{name: "{}"}})'
895
100
173
                 ' CREATE (n)-[:subject]->(m:func {{name: line.name,'
896
101
        """
174
                 ' id: lineid, type: line.type}})')
897
102
175
        
898
103
        if function_name[-4:] == "@plt":
176
        self.add_call_query = (' USING PERIODIC COMMIT 250'
899
104
            display_name = function_name[:-4]
177
                 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
900
105
            function_group = "plt_stub"
178
                 ' MATCH (p:program {{name: "{}"}})'
901
106
        elif function_name[:20] == "_._function_pointer_":
179
                 ' MATCH (p)-[:subject]->(n:func {{name: line.caller}})'
902
107
            display_name = function_name
180
                 ' USING INDEX n:func(name)'
903
108
            function_group = "function_pointer"
181
                 ' MATCH (p)-[:subject]->(m:func {{name: line.callee}})' 
904
109
        else:
182
                 ' USING INDEX m:func(name)'
905
110
            display_name = function_name
183
                 ' CREATE (n)-[r:calls]->(m)')
906
111
            function_group = "normal"
184
907
112
185
        self.add_program_query = ('CREATE (p:program {{name: "{}", uploader: "{}", '
908
113
        return display_name, function_group
186
                ' uploader_id: "{}", date: "{}",'
909
114
187
                ' function_count: {}, call_count: {}}})')
910
115
    def add_function(self, function_name):
188
911
116
        """
189
912
117
        Adds a function to the program, ready to be sent to the remote database.
190
    def __enter__(self):
913
118
        If the function name is already in use, this method effectively does
191
        """
914
119
          nothing and returns True.
192
        Allow DBProgram to be used as  a context manager.
915
120
193
        """
916
121
        :param function_name: a string which must be alphanumeric
194
        return self
917
122
        :return: True if the request succeeded, False otherwise
195
918
123
        """
196
    def __exit__(self, etype, evalue, etrace):
919
124
        if not Validator.validate(function_name):
197
        """
920
125
            return False
198
        Make sure that all files are properly closed.
921
126
        if self.class_contains_function(function_name):
199
        """
922
127
            return True
200
        self.func_writer.finish()
923
128
201
        self.call_writer.finish()
924
129
        display_name, function_group = self._get_display_name(function_name)
202
925
130
203
        # Propagate the error if there is one.
926
131
        query = ('START n = node({}) '
204
        return False if etype is not None else True
927
132
                 'CREATE (n)-[:subject]->(m:func {{type: "{}", name: "{}"}}) '
205
928
133
                 'RETURN m.name, id(m)')
206
    def add_function(self, name, typ='normal'):
929
134
        query = query.format(self._parent_id, function_group, display_name)
207
        """
930
135
208
        Add a function.
931
136
        self._funcs_tx.append(query)
209
932
137
210
        Arguments:
933
138
        self._functions[function_name] = function_name
211
            name:
934
139
212
                The name of the function.
935
140
        return True
213
            typ:
936
141
214
                The type of the function, may be any string, but standard types
937
142
    def class_contains_function(self, function_to_find):
215
                are:
938
143
        """
216
                    normal: we have the disassembly for this function
939
144
        Checks whether we contain a function with a given name.
217
                    stub:   we have the name but not the disassembly - usually
940
145
        :param function_to_find: string name of the function we wish to look up
218
                            an imported library function.
941
146
        :return: bool(the function exists in this AddToDatabase)
219
                    pointer: we know only that the function exists, not its
942
147
        """
220
                            name or details.
943
148
        return function_to_find in self._functions
221
        """
944
149
222
        self.func_writer.write(name, typ)
945
150
    def class_contains_call(self, function_calling, function_called):
223
946
151
        """
224
    def add_call(self, caller, callee):
947
152
        Checks whether we contain a call between the two named functions.
225
        """
948
153
        :param function_calling: string name of the calling-function
226
        Add a function call.
949
154
        :param function_called: string name of the called function
227
950
155
        :return: bool(function_calling calls function_called in us)
228
        Arguments:
951
156
        """
229
            caller:
952
157
        return (function_calling, function_called) in self._connections
230
                The name of the function making the call.
953
158
231
            callee:
954
159
    def add_function_call(self, fn_calling, fn_called):
232
                The name of the function called.
955
160
        """
233
        """
956
161
        Adds a function call to the program, ready to be sent to the database.
234
        self.call_writer.write(caller, callee)
957
162
        Effectively does nothing if there is already a function call between
235
958
163
          these two functions.
236
959
164
        Function names must be alphanumeric for easy security purposes;
237
    def _copy_local_to_remote_tmp_dir(self):
960
165
          returns False if they fail validation.
238
        """
961
166
        :param fn_calling: the name of the calling-function as a string.
239
        Move local tmp files to the server ready for upload.
962
167
          It should already exist in the AddToDatabase; if it does not,
240
963
168
          this method will create a stub for it.
241
        Return a tuple of iterators, the first over the paths on the remote
964
169
        :param fn_called: name of the function called by fn_calling.
242
        machine of the function files, and the second over the paths of the
965
170
          If it does not exist, we create a stub representation for it.
243
        call files.
966
171
        :return: True if successful, False otherwise
244
        """
967
172
        """
245
        print('Sending files to remote server...', end='')
968
173
        if not all((Validator.validate(fn_calling),
246
        stdout.flush()
969
174
                    Validator.validate(fn_called))):
247
        remote_funcs = self._ssh.send_to_tmp_dir(self.func_writer.file_iter())
970
175
            return False
248
        remote_calls = self._ssh.send_to_tmp_dir(self.call_writer.file_iter())
971
176
249
        print('finished.')
972
177
        if not self.class_contains_function(fn_called):
250
        return remote_funcs, remote_calls
973
178
            self.add_function(fn_called)
251
974
179
        if not self.class_contains_function(fn_calling):
252
    def _clean_tmp_files(self, remote_paths):
975
180
            self.add_function(fn_calling)
253
        """
976
181
254
        Delete temporary files on the local and remote machine.
977
182
        if not self.class_contains_call(fn_calling, fn_called):
255
978
183
            self._connections.append((fn_calling, fn_called))
256
        Arguments:
979
184
257
            remote_paths:
980
185
        return True
258
                A list of the paths of the remote fils.
981
259
        """
982
260
        print('Cleaning temporary files...', end='')
983
261
        file_paths = list(itertools.chain(self.func_writer.file_iter(),
984
262
                                          self.call_writer.file_iter()))
985
263
986
264
        for path in file_paths: 
987
265
            os.remove(path)
988
266
989
267
        os.rmdir(self._tmp_dir)
990
268
991
269
        try:
992
270
            # If the parent sextant temp folder is empty, remove it.
993
271
            os.rmdir(TMP_DIR)
994
272
        except:
995
273
            # There is other stuff in TMP_DIR (i.e. from other users), so
996
274
            # leave it.
997
275
            pass
998
276
999
277
        self._ssh.remove_from_tmp_dir(remote_paths)
1000
278
1001
279
        print('done.')
1002
280
1003
281
    def _create_db_constraints(self):
1004
282
        """
1005
283
        Create indexes in the database on program and function names.
1006
284
1007
285
        The program name index is a constraint, which will also garuantee the
1008
286
        uniqueness of program names.
1009
287
        """
1010
288
        # Prepare a transaction object which we use to execute cypher queries.
1011
289
        tx = self._db.transaction(using_globals=False, for_query=True)
1012
290
1013
291
        tx.append('CREATE CONSTRAINT ON (p:program) ASSERT p.name IS UNIQUE')
1014
292
        tx.append('CREATE INDEX ON :func(name)')
1015
293
1016
294
        # Apply the transaction.
1017
295
        tx.commit()
1018
186
296
1019
187
    def commit(self):
297
    def commit(self):
1020
188
        """
298
        """
1023
189
        Call this when you are finished with the object.
299
        Insert the program into the database.
1024
190
        Changes are not synced to the remote database until this is called.
300
1025
301
        Move the local temp files created by our func_writer and call_writer
1026
302
        to the database server's temp directory. From there use cypher queries
1027
303
        to upload them into the database, before cleaning them up.
1028
191
        """
304
        """
1047
192
        functions = self._funcs_tx.commit()  # send off the function names
305
        # Ensure that the most recent files are flushed and closed.
1048
193
306
        func_file_count, func_line_count = self.func_writer.finish()
1049
194
        # now functions is a list of QuerySequence objects, which each have a
307
        call_file_count, call_line_count = self.call_writer.finish()
1050
195
        # .elements property which produces [['name', id]]
308
1051
196
309
        # Account for the header line at the top of each file.
1052
197
        id_funcs = dict([seq.elements[0] for seq in functions])
310
        func_count = func_line_count - func_file_count
1053
198
        logging.info('Functions uploaded. Uploading calls...')
311
        call_count = call_line_count - call_file_count
1054
199
312
        
1055
200
        # so id_funcs is a dict with id_funcs['name'] == id
313
        # Get the remote path names as iterators, then make lists of them
1056
201
        for call in self._connections:
314
        # so that we can iterate over them more than once.
1057
202
            query = ('MATCH n WHERE id(n) = {} '
315
        remote_f_iter, remote_c_iter = self._copy_local_to_remote_tmp_dir()
1058
203
                     'MATCH m WHERE id(m) = {} '
316
        remote_funcs, remote_calls = map(list, (remote_f_iter, remote_c_iter))
1059
204
                     'CREATE (n)-[:calls]->(m)')
317
1060
205
            query = query.format(id_funcs[self._get_display_name(call[0])[0]],
318
        # Create the indexes and constraints in the database.
1061
206
                                 id_funcs[self._get_display_name(call[1])[0]])
319
        self._create_db_constraints()
1062
207
            self._calls_tx.append(query)
320
1063
208
321
1064
209
        self._calls_tx.commit()
322
        try:
1065
323
            tx = self._db.transaction(using_globals=False, for_query=True)
1066
324
1067
325
            # Create the program node in the database.
1068
326
            tx.append(self.add_program_query.format(self.program_name, self.uploader,
1069
327
                                                    self.uploader_id, self.date,
1070
328
                                                    func_count, call_count))
1071
329
            tx.commit()
1072
330
1073
331
            # Create the functions.
1074
332
            for files, query, descr in zip((remote_funcs, remote_calls),
1075
333
                                           (self.add_func_query, self.add_call_query),
1076
334
                                           ('funcs', 'calls')):
1077
335
                start = time()
1078
336
                for i, path in enumerate(files):
1079
337
                    completed = int(100*float(i+1)/len(files))
1080
338
1081
339
                    print('\rUploading {}: {}%'.format(descr, completed), end='')
1082
340
                    stdout.flush()
1083
341
1084
342
                    tx.append(query.format(path, self.program_name))
1085
343
                    tx.commit()
1086
344
                end = time()
1087
345
                print(' done.')
1088
346
1089
347
        finally:
1090
348
            # Cleanup temporary folders
1091
349
            self._clean_tmp_files(remote_funcs + remote_calls)
1092
210
350
1093
211
351
1094
212
class FunctionQueryResult:
352
class FunctionQueryResult:
1095
@@ -219,7 +359,7 @@
1096
219
        self._update_common_functions()
359
        self._update_common_functions()
1097
220
360
1098
221
    def __eq__(self, other):
361
    def __eq__(self, other):
1100
222
        # we make a dictionary so that we can perform easy comparison
362
        # We make a dictionary so that we can perform easy comparison.
1101
223
        selfdict = {func.name: func for func in self.functions}
363
        selfdict = {func.name: func for func in self.functions}
1102
224
        otherdict = {func.name: func for func in other.functions}
364
        otherdict = {func.name: func for func in other.functions}
1103
225
365
1104
@@ -243,20 +383,20 @@
1105
243
        if rest_output is None or not rest_output.elements:
383
        if rest_output is None or not rest_output.elements:
1106
244
            return []
384
            return []
1107
245
385
1109
246
        # how we store this is: a dict
386
        # How we store this is: a dict
1110
247
        #   with keys  'functionname'
387
        #   with keys  'functionname'
1111
248
        #   and values [the function object we will use,
388
        #   and values [the function object we will use,
1112
249
        #               and a set of (function names this function calls),
389
        #               and a set of (function names this function calls),
1114
250
        #               and numeric ID of this node in the Neo4J database]
390
        #               and numeric ID of this node in the Neo4J database].
1115
251
391
1116
252
        result = {}
392
        result = {}
1117
253
393
1119
254
        # initial pass for names of functions
394
        # Initial pass for names of functions.
1120
255
395
1122
256
        # if the following assertion failed, we've probably called db.query
396
        # If the following assertion failed, we've probably called db.query
1123
257
        # to get it to not return client.Node objects, which is wrong.
397
        # to get it to not return client.Node objects, which is wrong.
1124
258
        # we attempt to handle this a bit later; this should never arise, but
398
        # we attempt to handle this a bit later; this should never arise, but
1126
259
        # we can cope with it happening in some cases, like the test suite
399
        # we can cope with it happening in some cases, like the test suite.
1127
260
400
1128
261
        if type(rest_output.elements) is not list:
401
        if type(rest_output.elements) is not list:
1129
262
            logging.warning('Not a list: {}'.format(type(rest_output.elements)))
402
            logging.warning('Not a list: {}'.format(type(rest_output.elements)))
1130
@@ -264,11 +404,12 @@
1131
264
        for node_list in rest_output.elements:
404
        for node_list in rest_output.elements:
1132
265
            assert(isinstance(node_list, list))
405
            assert(isinstance(node_list, list))
1133
266
            for node in node_list:
406
            for node in node_list:
1135
267
                if isinstance(node, client.Node):
407
                if isinstance(node, neo4jrestclient.Node):
1136
268
                    name = node.properties['name']
408
                    name = node.properties['name']
1137
269
                    node_id = node.id
409
                    node_id = node.id
1138
270
                    node_type = node.properties['type']
410
                    node_type = node.properties['type']
1140
271
                else:  # this is the handling we mentioned earlier;
411
                else:  
1141
412
                    # This is the handling we mentioned earlier;
1142
272
                    # we are a dictionary instead of a list, as for some
413
                    # we are a dictionary instead of a list, as for some
1143
273
                    # reason we've returned Raw rather than Node data.
414
                    # reason we've returned Raw rather than Node data.
1144
274
                    # We should never reach this code, but just in case.
415
                    # We should never reach this code, but just in case.
1145
@@ -283,7 +424,7 @@
1146
283
                                set(),
424
                                set(),
1147
284
                                node_id]
425
                                node_id]
1148
285
426
1150
286
        # end initialisation of names-dictionary
427
        # End initialisation of names-dictionary.
1151
287
428
1152
288
        if self._parent_db_connection is not None:
429
        if self._parent_db_connection is not None:
1153
289
            # This is the normal case, of extracting results from a server.
430
            # This is the normal case, of extracting results from a server.
1154
@@ -301,7 +442,7 @@
1155
301
            logging.debug('exec')
442
            logging.debug('exec')
1156
302
            results = new_tx.execute()
443
            results = new_tx.execute()
1157
303
444
1159
304
            # results is a list of query results, each of those being a list of
445
            # Results is a list of query results, each of those being a list of
1160
305
            # calls.
446
            # calls.
1161
306
447
1162
307
            for call_list in results:
448
            for call_list in results:
1163
@@ -315,7 +456,7 @@
1164
315
                    # recall: set union is denoted by |
456
                    # recall: set union is denoted by |
1165
316
457
1166
317
        else:
458
        else:
1168
318
            # we don't have a parent database connection.
459
            # We don't have a parent database connection.
1169
319
            # This has probably arisen because we created this object from a
460
            # This has probably arisen because we created this object from a
1170
320
            # test suite, or something like that.
461
            # test suite, or something like that.
1171
321
            for node in rest_output.elements:
462
            for node in rest_output.elements:
1172
@@ -353,19 +494,10 @@
1173
353
        func_list = [func for func in self.functions if func.name == name]
494
        func_list = [func for func in self.functions if func.name == name]
1174
354
        return None if len(func_list) == 0 else func_list[0]
495
        return None if len(func_list) == 0 else func_list[0]
1175
355
496
1176
356
1177
357
def set_common_cutoff(common_def):
1178
358
    """
1179
359
    Sets the number of incoming connections at which we deem a function 'common'
1180
360
    Default is 10 (which is used if this method is never called).
1181
361
    :param common_def: number of incoming connections
1182
362
    """
1183
363
    global COMMON_CUTOFF
1184
364
    COMMON_CUTOFF = common_def
1185
365
1186
366
1187
367
class Function(object):
497
class Function(object):
1189
368
    """Represents a function which might appear in a FunctionQueryResult."""
498
    """
1190
499
    Represents a function which might appear in a FunctionQueryResult.
1191
500
    """
1192
369
501
1193
370
    def __eq__(self, other):
502
    def __eq__(self, other):
1194
371
        funcs_i_call_list = {func.name for func in self.functions_i_call}
503
        funcs_i_call_list = {func.name for func in self.functions_i_call}
1195
@@ -393,11 +525,11 @@
1196
393
        self.name = function_name
525
        self.name = function_name
1197
394
        self.is_common = False
526
        self.is_common = False
1198
395
        self._number_calling_me = 0
527
        self._number_calling_me = 0
1200
396
        # care: _number_calling_me is not automatically updated, except by
528
        # Care: _number_calling_me is not automatically updated, except by
1201
397
        # any invocation of FunctionQueryResult._update_common_functions.
529
        # any invocation of FunctionQueryResult._update_common_functions.
1202
398
530
1203
399
531
1205
400
class SextantConnection:
532
class SextantConnection(object):
1206
401
    """
533
    """
1207
402
    RESTful connection to a remote database.
534
    RESTful connection to a remote database.
1208
403
    It can be used to create/delete/query programs.
535
    It can be used to create/delete/query programs.
1209
@@ -406,56 +538,214 @@
1210
406
    ProgramWithMetadata = namedtuple('ProgramWithMetadata',
538
    ProgramWithMetadata = namedtuple('ProgramWithMetadata',
1211
407
                                     ['uploader', 'uploader_id',
539
                                     ['uploader', 'uploader_id',
1212
408
                                      'program_name', 'date', 
540
                                      'program_name', 'date', 
1220
409
                                      'number_of_funcs'])
541
                                      'number_of_funcs', 'number_of_calls'])
1221
410
542
1222
411
    def __init__(self, url):
543
    @staticmethod
1223
412
        self.url = url
544
    def _is_localhost(host, port):
1224
413
        self._db = GraphDatabase(url)
545
        """
1225
414
546
        Checks whether a host is an alias to localhost.
1226
415
    def new_program(self, name_of_program):
547
1227
548
        Raises socket.gaierror if the host was not found.
1228
549
        """
1229
550
        addr = socket.getaddrinfo(host, port)[0][4][0]
1230
551
        return addr in ('127.0.0.1', '::1')
1231
552
1232
553
    @staticmethod
1233
554
    def _is_port_used(port):
1234
555
        """
1235
556
        Checks with the OS to see whether a port is open.
1236
557
1237
558
        Beware: port is passed directly to the shell. Make sure it is an integer.
1238
559
        We raise ValueError if it is not.
1239
560
        :param port: integer port to check for openness
1240
561
        :return: bool(port is in use)
1241
562
        """
1242
563
        result = False
1243
564
1244
565
        # We follow:
1245
566
        # http://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python
1246
567
        if not (isinstance(port, int) and port > 0):
1247
568
            raise ValueError('port {} must be a positive integer.'.format(port))
1248
569
1249
570
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1250
571
        try:
1251
572
            sock.bind(('127.0.0.1', port))
1252
573
        except socket.error as e:
1253
574
            if e.errno == os.errno.EADDRINUSE:
1254
575
                result = True
1255
576
            else:
1256
577
                raise
1257
578
1258
579
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1259
580
1260
581
        return result  # that is, the port is not used
1261
582
1262
583
    @staticmethod
1263
584
    def _get_unused_port():
1264
585
        """
1265
586
        Returns a port number between 10000 and 50000 which is not currently open.
1266
587
        """
1267
588
1268
589
        keep_going = True
1269
590
        while keep_going:
1270
591
            portnum = random.randint(10000, 50000)
1271
592
            keep_going = SextantConnection._is_port_used(portnum)
1272
593
        return portnum
1273
594
1274
595
1275
596
    def __enter__(self):
1276
597
        return self
1277
598
1278
599
    def __exit__(self, etype, evalue, etrace):
1279
600
        self.close()
1280
601
        return False if etype is not None else True
1281
602
1282
603
1283
604
    def __init__(self, remotehost, remoteport, no_ssh_tunnel=False):
1284
605
        """
1285
606
        Initialise the database and ssh connections.
1286
607
1287
608
        Arguments:
1288
609
            remotehost:
1289
610
                The remote host name to connect to.
1290
611
            remoteport:
1291
612
                The port number to connect to on the remote host.
1292
613
            no_ssh_tunnel:
1293
614
                Disables the SSHManager if True. Prevents program upload.
1294
615
        """
1295
616
1296
617
        self.remote_host = remotehost
1297
618
        self.remote_port = remoteport
1298
619
1299
620
1300
621
        self._no_ssh_tunnel = no_ssh_tunnel
1301
622
        self._ssh = None
1302
623
        self._db = None
1303
624
1304
625
        self.open()
1305
626
1306
627
    def open(self):
1307
628
        local_port = SextantConnection._get_unused_port()
1308
629
        is_localhost = SextantConnection._is_localhost(self.remote_host, self.remote_port)
1309
630
1310
631
        if self._no_ssh_tunnel and not is_localhost:
1311
632
            raise SSHConnectionError('Cannot connect to the remote database '
1312
633
                                     'without an ssh connection.')
1313
634
        else:
1314
635
            # Either we are making an ssh tunnel or we are contacting localhost.
1315
636
            self._ssh = SSHManager(local_port, 
1316
637
                                   self.remote_host, 
1317
638
                                   self.remote_port, 
1318
639
                                   is_localhost=is_localhost)
1319
640
1320
641
            port = self.remote_port if is_localhost else local_port
1321
642
            url = 'http://localhost:{}'.format(port)
1322
643
1323
644
        self._db = neo4jrestclient.GraphDatabase(url)
1324
645
 
1325
646
    def close(self):
1326
647
        """
1327
648
        Close the ssh connection to clean up its resources.
1328
649
        """
1329
650
        if self._ssh:
1330
651
            self._ssh.close()
1331
652
1332
653
    def new_program(self, program_name):
1333
416
        """
654
        """
1334
417
        Request that the remote database create a new program with the given name.
655
        Request that the remote database create a new program with the given name.
1335
418
        This procedure will create a new program remotely; you can manipulate
656
        This procedure will create a new program remotely; you can manipulate
1337
419
          that program using the returned AddToDatabase object.
657
          that program using the returned DBProgram object.
1338
420
        The name can appear in the database already, but this is not recommended
658
        The name can appear in the database already, but this is not recommended
1339
421
          because then delete_program will not know which to delete. Check first
659
          because then delete_program will not know which to delete. Check first
1340
422
          using self.check_program_exists.
660
          using self.check_program_exists.
1342
423
        The name specified must pass Validator.validate()ion; this is a measure
661
        The name specified must pass validate_query()ion; this is a measure
1343
424
          to prevent Cypher injection attacks.
662
          to prevent Cypher injection attacks.
1346
425
        :param name_of_program: string program name
663
        :param program_name: string program name
1347
426
        :return: AddToDatabase instance if successful
664
        :return: DBProgram instance if successful
1348
427
        """
665
        """
1349
428
666
1353
429
        if not Validator.validate(name_of_program):
667
        if not validate_query(program_name):
1354
430
            raise ValueError(
668
            raise ValueError("{} is not a valid program name"
1355
431
                "{} is not a valid program name".format(name_of_program))
669
                             .format(program_name))
1356
432
        
670
        
1357
433
        uploader = getpass.getuser()
671
        uploader = getpass.getuser()
1358
434
        uploader_id = os.getuid()
672
        uploader_id = os.getuid()
1366
435
673
        timestr = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
1367
436
        return AddToDatabase(sextant_connection=self,
674
1368
437
                             program_name=name_of_program,
675
        return DBProgram(self, program_name, uploader, 
1369
438
                             uploader=uploader, uploader_id=uploader_id,
676
                         uploader_id, date=timestr)
1370
439
                             date=str(datetime.now()))
677
1371
440
678
    def delete_program(self, program_name):
1365
441
    def delete_program(self, name_of_program):
1372
442
        """
679
        """
1373
443
        Request that the remote database delete a specified program.
680
        Request that the remote database delete a specified program.
1375
444
        :param name_of_program: a string which must be alphanumeric only
681
        :param program_name: a string which must be alphanumeric only
1376
445
        :return: bool(request succeeded)
682
        :return: bool(request succeeded)
1377
446
        """
683
        """
1386
447
        if not Validator.validate(name_of_program):
684
        if not program_name in self.get_program_names():
1387
448
            return False
685
            print('No program `{}` in the database'.format(program_name))
1388
449
686
            return True
1389
450
        q = """MATCH (n) WHERE n.name= "{}" AND n.type="program"
687
        else:
1390
451
        OPTIONAL MATCH (n)-[r]-(b) OPTIONAL MATCH (b)-[rel]-()
688
            print('Deleting `{}` from the database. '
1391
452
        DELETE  b,rel DELETE n, r""".format(name_of_program)
689
                  'This may take some time for larger programs.'
1392
453
690
                  .format(program_name))
1393
454
        self._db.query(q)
691
1394
692
        start = time()
1395
693
        tx = self._db.transaction(using_globals=False, for_query=True)
1396
694
1397
695
        count_query = (' MATCH (p:program {{name: "{}"}})'
1398
696
                       ' RETURN p.function_count, p.call_count'
1399
697
                       .format(program_name))
1400
698
1401
699
        tx.append(count_query)
1402
700
        func_count, call_count = tx.commit()[0].elements[0]
1403
701
1404
702
        del_call_query = ('OPTIONAL MATCH (p:program {{name: "{}"}})'
1405
703
                          '-[:subject]->(f:func)-[c:calls]->()'
1406
704
                          ' WITH c LIMIT 5000 DELETE c RETURN count(distinct(c))'
1407
705
                          .format(program_name))
1408
706
1409
707
        del_func_query = ('OPTIONAL MATCH (p:program {{name: "{}"}})'
1410
708
                          '-[s:subject]->(f:func)'
1411
709
                          ' WITH s, f LIMIT 5000 DELETE s, f RETURN count(f)'
1412
710
                          .format(program_name))
1413
711
1414
712
        del_prog_query = ('MATCH (p:program {{name: "{}"}}) DELETE p'
1415
713
                          .format(program_name))
1416
714
1417
715
        # Delete calls first, a node may not be deleted until all relationships
1418
716
        # referencing it are deleted.
1419
717
        for count, query, descr in zip((call_count, func_count),
1420
718
                                       (del_call_query, del_func_query),
1421
719
                                       ('calls', 'funcs')): 
1422
720
            # Change tracks whether the last delete did anything. We would
1423
721
            # like to use: while done < count: ..., but if the program has
1424
722
            # already been partially deleted then this will never terminate.
1425
723
            # Furthermore, if there are no functions or no calls, the while
1426
724
            # loop will be appropriately skipped.
1427
725
            change = count
1428
726
            done = 0
1429
727
            while change:
1430
728
                completed = int(100 * float(done)/count)
1431
729
                print('\rDeleting {}: {}%'.format(descr, completed), end='')
1432
730
                stdout.flush()
1433
731
1434
732
                tx.append(query)
1435
733
                change = tx.commit()[0].elements[0][0]
1436
734
                done += change
1437
735
            if done:
1438
736
                print(' done.')
1439
737
1440
738
        # Delete the program node.
1441
739
        tx.append(del_prog_query)
1442
740
        tx.commit()
1443
741
1444
742
        end = time()
1445
743
        print('Finished in {:.2f}s.'.format(end - start))
1446
455
744
1447
456
        return True
745
        return True
1448
457
746
1450
458
    def _execute_query(self, prog_name='', query=''):
747
1451
748
    def _execute_query(self, prog_name, query):
1452
459
        """
749
        """
1453
460
        Executes a Cypher query against the remote database.
750
        Executes a Cypher query against the remote database.
1454
461
        Note that this returns a FunctionQueryResult, so is unsuitable for any
751
        Note that this returns a FunctionQueryResult, so is unsuitable for any
1455
@@ -468,7 +758,7 @@
1456
468
        :param query: verbatim query we wish the server to execute
758
        :param query: verbatim query we wish the server to execute
1457
469
        :return: a FunctionQueryResult corresponding to the server's output
759
        :return: a FunctionQueryResult corresponding to the server's output
1458
470
        """
760
        """
1460
471
        rest_output = self._db.query(query, returns=client.Node)
761
        rest_output = self._db.query(query, returns=neo4jrestclient.Node)
1461
472
762
1462
473
        return FunctionQueryResult(parent_db=self._db,
763
        return FunctionQueryResult(parent_db=self._db,
1463
474
                                   program_name=prog_name,
764
                                   program_name=prog_name,
1464
@@ -481,12 +771,11 @@
1465
481
          method which requires a program-name input.
771
          method which requires a program-name input.
1466
482
        :return: a list of function-name strings.
772
        :return: a list of function-name strings.
1467
483
        """
773
        """
1469
484
        q = """MATCH (n) WHERE n.type = "program" RETURN n.name"""
774
        q = 'MATCH (n:program) RETURN n.name'
1470
485
        program_names = self._db.query(q, returns=str).elements
775
        program_names = self._db.query(q, returns=str).elements
1471
486
776
1473
487
        result = [el[0] for el in program_names]
777
        return set(el[0] for el in program_names)
1474
488
778
1475
489
        return set(result)
1476
490
779
1477
491
    def programs_with_metadata(self):
780
    def programs_with_metadata(self):
1478
492
        """
781
        """
1479
@@ -498,27 +787,28 @@
1480
498
       
787
       
1481
499
        """
788
        """
1482
500
        
789
        
1486
501
        q = ("MATCH (base) WHERE base.type = 'program' "
790
        q = (' MATCH (p:program)'
1487
502
             "MATCH (base)-[:subject]->(n)"
791
             ' RETURN p.uploader, p.uploader_id, p.name, p.date,'
1488
503
             "RETURN base.uploader, base.uploader_id, base.name, base.date, count(n)")
792
             ' p.function_count, p.call_count')
1489
504
        result = self._db.query(q)
793
        result = self._db.query(q)
1490
505
        return {self.ProgramWithMetadata(*res) for res in result}
794
        return {self.ProgramWithMetadata(*res) for res in result}
1491
506
795
1492
507
    def check_program_exists(self, program_name):
796
    def check_program_exists(self, program_name):
1493
508
        """
797
        """
1494
509
        Execute query to check whether a program with the given name exists.
798
        Execute query to check whether a program with the given name exists.
1496
510
        Returns False if the program_name fails validation against Validator.
799
        Returns False if the program_name fails validation (i.e. is possibly
1497
800
        unsafe as a string in a cypher query).
1498
511
        :return: bool(the program exists in the database).
801
        :return: bool(the program exists in the database).
1499
512
        """
802
        """
1500
513
803
1502
514
        if not Validator.validate(program_name):
804
        if not validate_query(program_name):
1503
515
            return False
805
            return False
1504
516
806
1507
517
        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "
807
        q = ('MATCH (p:program {{name: "{}"}}) RETURN p LIMIT 1'
1508
518
             "RETURN count(base)").format(program_name)
808
             .format(program_name))
1509
519
809
1512
520
        result = self._db.query(q, returns=int)
810
        result = self._db.query(q, returns=neo4jrestclient.Node)
1513
521
        return result.elements[0][0] > 0
811
        return bool(result)
1514
522
812
1515
523
    def check_function_exists(self, program_name, function_name):
813
    def check_function_exists(self, program_name, function_name):
1516
524
        """
814
        """
1517
@@ -529,18 +819,18 @@
1518
529
        :param function_name: string name of the function to check for existence
819
        :param function_name: string name of the function to check for existence
1519
530
        :return: bool(names validate correctly, and function exists in program)
820
        :return: bool(names validate correctly, and function exists in program)
1520
531
        """
821
        """
1533
532
        if not self.check_program_exists(program_name):
822
        if not validate_query(program_name):
1534
533
            return False
823
            return False
1535
534
824
1536
535
        if not Validator.validate(program_name):
825
        pmatch = '(:program {{name: "{}"}})'.format(program_name)
1537
536
            return False
826
        fmatch = '(f:func {{name: "{}"}})'.format(function_name)
1538
537
827
        # be explicit about index usage
1539
538
        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program'"
828
        q = (' MATCH {}-[:subject]->{} USING INDEX f:func(name)'
1540
539
             "MATCH (base)-[r:subject]->(m) WHERE m.name = '{}'"
829
             ' RETURN f LIMIT 1'.format(pmatch, fmatch))
1541
540
             "RETURN count(m)").format(program_name, function_name)
830
1542
541
831
        # result will be an empty list if the function was not found
1543
542
        result = self._db.query(q, returns=int)
832
        result = self._db.query(q, returns=neo4jrestclient.Node)
1544
543
        return result.elements[0][0] > 0
833
        return bool(result)
1545
544
834
1546
545
    def get_function_names(self, program_name):
835
    def get_function_names(self, program_name):
1547
546
        """
836
        """
1548
@@ -552,12 +842,11 @@
1549
552
          a set of function-name strings otherwise.
842
          a set of function-name strings otherwise.
1550
553
        """
843
        """
1551
554
844
1554
555
        if not self.check_program_exists(program_name):
845
        if not validate_query(program_name):
1555
556
            return None
846
            return set()
1556
557
847
1560
558
        q = ("MATCH (base) WHERE base.name = '{}' AND base.type = 'program' "
848
        q = (' MATCH (:program {{name: "{}"}})-[:subject]->(f:func)'
1561
559
             "MATCH (base)-[r:subject]->(m) "
849
             ' RETURN f.name').format(program_name)
1559
560
             "RETURN  m.name").format(program_name)
1562
561
        return {func[0] for func in self._db.query(q)}
850
        return {func[0] for func in self._db.query(q)}
1563
562
851
1564
563
    def get_all_functions_called(self, program_name, function_calling):
852
    def get_all_functions_called(self, program_name, function_calling):
1565
@@ -570,16 +859,13 @@
1566
570
        :return: FunctionQueryResult, maximal subgraph rooted at function_calling
859
        :return: FunctionQueryResult, maximal subgraph rooted at function_calling
1567
571
        """
860
        """
1568
572
861
1569
573
        if not self.check_program_exists(program_name):
1570
574
            return None
1571
575
1572
576
        if not self.check_function_exists(program_name, function_calling):
862
        if not self.check_function_exists(program_name, function_calling):
1573
577
            return None
863
            return None
1574
578
864
1579
579
        q = """MATCH (base) WHERE base.name = '{}' ANd base.type = 'program'
865
        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})'
1580
580
            MATCH (base)-[:subject]->(m) WHERE m.name='{}'
866
             ' USING INDEX f:func(name)'
1581
581
            MATCH (m)-[:calls*]->(n)
867
             ' MATCH (f)-[:calls*]->(g) RETURN distinct f, g'
1582
582
            RETURN distinct n, m""".format(program_name, function_calling)
868
             .format(program_name, function_calling))
1583
583
869
1584
584
        return self._execute_query(program_name, q)
870
        return self._execute_query(program_name, q)
1585
585
871
1586
@@ -593,16 +879,13 @@
1587
593
        :return: FunctionQueryResult, maximal connected subgraph with leaf function_called
879
        :return: FunctionQueryResult, maximal connected subgraph with leaf function_called
1588
594
        """
880
        """
1589
595
881
1590
596
        if not self.check_program_exists(program_name):
1591
597
            return None
1592
598
1593
599
        if not self.check_function_exists(program_name, function_called):
882
        if not self.check_function_exists(program_name, function_called):
1594
600
            return None
883
            return None
1595
601
884
1600
602
        q = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'
885
        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(g:func {{name: "{}"}})'
1601
603
            MATCH (base)-[r:subject]->(m) WHERE m.name='{}'
886
             ' USING INDEX g:func(name)'
1602
604
            MATCH (n)-[:calls*]->(m) WHERE n.name <> '{}'
887
             ' MATCH (f)-[:calls*]->(g) WHERE f.name <> "{}"'
1603
605
            RETURN distinct n , m"""
888
             ' RETURN distinct f , g')
1604
606
        q = q.format(program_name, function_called, program_name)
889
        q = q.format(program_name, function_called, program_name)
1605
607
890
1606
608
        return self._execute_query(program_name, q)
891
        return self._execute_query(program_name, q)
1607
@@ -628,12 +911,14 @@
1608
628
        if not self.check_function_exists(program_name, function_calling):
911
        if not self.check_function_exists(program_name, function_calling):
1609
629
            return None
912
            return None
1610
630
913
1617
631
        q = r"""MATCH (pr) WHERE pr.name = '{}' AND pr.type = 'program'
914
        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(start:func {{name: "{}"}})'
1618
632
                MATCH p=(start {{name: "{}" }})-[:calls*]->(end {{name:"{}"}})
915
             ' USING INDEX start:func(name)'
1619
633
                  WHERE (pr)-[:subject]->(start)
916
             ' MATCH (p)-[:subject]->(end:func {{name: "{}"}})'
1620
634
                WITH DISTINCT nodes(p) AS result
917
             ' USING INDEX end:func(name)'
1621
635
                UNWIND result AS answer
918
             ' MATCH path=(start)-[:calls*]->(end)'
1622
636
                RETURN answer"""
919
             ' WITH DISTINCT nodes(path) AS result'
1623
920
             ' UNWIND result AS answer'
1624
921
             ' RETURN answer')
1625
637
        q = q.format(program_name, function_calling, function_called)
922
        q = q.format(program_name, function_calling, function_called)
1626
638
923
1627
639
        return self._execute_query(program_name, q)
924
        return self._execute_query(program_name, q)
1628
@@ -648,11 +933,9 @@
1629
648
        if not self.check_program_exists(program_name):
933
        if not self.check_program_exists(program_name):
1630
649
            return None
934
            return None
1631
650
935
1637
651
        query = """MATCH (base) WHERE base.name = '{}' AND base.type = 'program'
936
        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func)'
1638
652
                MATCH (base)-[subject:subject]->(m)
937
             ' RETURN (f)'.format(program_name))
1639
653
                RETURN DISTINCT (m)""".format(program_name)
938
        return self._execute_query(program_name, q)
1635
654
1636
655
        return self._execute_query(program_name, query)
1640
656
939
1641
657
    def get_shortest_path_between_functions(self, program_name, func1, func2):
940
    def get_shortest_path_between_functions(self, program_name, func1, func2):
1642
658
        """
941
        """
1643
@@ -671,9 +954,11 @@
1644
671
        if not self.check_function_exists(program_name, func2):
954
        if not self.check_function_exists(program_name, func2):
1645
672
            return None
955
            return None
1646
673
956
1651
674
        q = """MATCH (func1 {{ name:"{}" }}),(func2 {{ name:"{}" }}),
957
        q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})'
1652
675
            p = shortestPath((func1)-[:calls*]->(func2))
958
             ' USING INDEX f:func(name)'
1653
676
            UNWIND nodes(p) AS ans
959
             ' MATCH (p)-[:subject]->(g:func {{name: "{}"}})'
1654
677
            RETURN ans""".format(func1, func2)
960
             ' MATCH path=shortestPath((f)-[:calls*]->(g))'
1655
961
             ' UNWIND nodes(path) AS ans'
1656
962
             ' RETURN ans'.format(program_name, func1, func2))
1657
678
963
1658
679
        return self._execute_query(program_name, q)
964
        return self._execute_query(program_name, q)
1659
680
965
1660
=== modified file 'src/sextant/export.py'
1661
--- src/sextant/export.py	2014-09-04 09:46:18 +0000
1662
+++ src/sextant/export.py	2014-10-23 12:33:12 +0000
1663
@@ -46,7 +46,7 @@
1664
46
        font_name = "Helvetica"
46
        font_name = "Helvetica"
1665
47
47
1666
48
        for func in program.get_functions():
48
        for func in program.get_functions():
1668
49
            if func.type == "plt_stub":
49
            if func.type == "stub":
1669
50
                output_str += ' "{}" [fillcolor=pink, style=filled]\n'.format(func.name)
50
                output_str += ' "{}" [fillcolor=pink, style=filled]\n'.format(func.name)
1670
51
            elif func.type == "function_pointer":
51
            elif func.type == "function_pointer":
1671
52
                output_str += ' "{}" [fillcolor=yellow, style=filled]\n'.format(func.name)
52
                output_str += ' "{}" [fillcolor=yellow, style=filled]\n'.format(func.name)
1672
@@ -108,7 +108,7 @@
1673
108
108
1674
109
        for func in program.get_functions():
109
        for func in program.get_functions():
1675
110
            display_func = ProgramConverter.get_display_name(func)
110
            display_func = ProgramConverter.get_display_name(func)
1677
111
            if func.type == "plt_stub":
111
            if func.type == "stub":
1678
112
                colour = "#ff00ff"
112
                colour = "#ff00ff"
1679
113
            elif func.type == "function_pointer":
113
            elif func.type == "function_pointer":
1680
114
                colour = "#99ffff"
114
                colour = "#99ffff"
1681
@@ -175,4 +175,4 @@
1682
175
                output_str += '<edge source="{}" target="{}"> <data key="calls">1</data> </edge>\n'.format(func.name, callee.name)
175
                output_str += '<edge source="{}" target="{}"> <data key="calls">1</data> </edge>\n'.format(func.name, callee.name)
1683
176
176
1684
177
        output_str += '</graph>\n</graphml>'
177
        output_str += '</graph>\n</graphml>'
1685
178
        return output_str
1686
179
\ No newline at end of file
178
\ No newline at end of file
1687
179
        return output_str
1688
180
180
1689
=== modified file 'src/sextant/objdump_parser.py' (properties changed: -x to +x)
1690
--- src/sextant/objdump_parser.py	2014-08-18 13:00:53 +0000
1691
+++ src/sextant/objdump_parser.py	2014-10-23 12:33:12 +0000
1692
@@ -1,273 +1,313 @@
1702
1
# -----------------------------------------
1
#!/usr/bin/python
1694
2
# Sextant
1695
3
# Copyright 2014, Ensoft Ltd.
1696
4
# Author: Patrick Stevens
1697
5
# -----------------------------------------
1698
6
1699
7
#!/usr/bin/python3
1700
8
1701
9
import re
1703
10
import argparse
2
import argparse
1704
11
import os.path
1705
12
import subprocess
3
import subprocess
1706
13
import logging
4
import logging
1707
14
5
1767
15
6
"""
1768
16
class ParsedObject():
7
Provide a parser class to extract functions and calls from an objdump file,
1769
17
    """
8
and a way to generate such a file from an object file.
1770
18
    Represents a function as parsed from an objdump disassembly.
9
"""
1771
19
    Has a name (which is the verbatim name like '__libc_start_main@plt'),
10
__all__ = ('Parser', 'run_objdump', 'FileNotFoundError')
1772
20
        a position (which is the virtual memory location in hex, like '08048320'
11
1773
21
                    extracted from the dump),
12
1774
22
        and a canonical_position (which is the virtual memory location in hex
13
class FileNotFoundError(Exception):
1775
23
                                  but stripped of leading 0s, so it should be a
14
    """
1776
24
                                  unique id).
15
    Exception raised when Parser fails to open its file.
1777
25
    It also has a list what_do_i_call of ParsedObjects it calls using the
16
    """
1778
26
      assembly keyword 'call'.
17
    pass
1779
27
    It has a list original_code of its assembler code, too, in case it's useful.
18
1780
28
    """
19
1781
29
20
class Parser(object):
1782
30
    @staticmethod
21
    """
1783
31
    def get_canonical_position(position):
22
    Extract functions and calls from an object file or an objdump output file.
1784
32
        return position.lstrip('0')
23
1785
33
24
    Only the specified sections of the disassembled code will be parsed.
1786
34
    def __eq__(self, other):
25
1787
35
        return self.name == other.name
26
    Attributes:
1788
36
27
        path:
1789
37
    def __init__(self, input_lines=None, assembler_section='', function_name='',
28
            Set to file_path in __init__.
1790
38
                 ignore_function_pointers=True, function_pointer_id=None):
29
        _file:
1791
39
        """
30
            Set to file_object in __init__.
1792
40
        Create a new ParsedObject given the definition-lines from objdump -S.
31
        sections:
1793
41
        A sample first definition-line is '08048300 <__gmon_start__@plt>:\n'
32
            Initialised by taking the sections argument to __init__ and
1794
42
         but this method
33
            and converting it to a set.
1795
43
         expects to see the entire definition eg
34
        ignore_ptrs:
1796
44
35
            Set to ignore_ptrs in __init__.
1797
45
080482f0 <puts@plt>:
36
1798
46
 80482f0:	ff 25 00 a0 04 08    	jmp    *0x804a000
37
        section_count:
1799
47
 80482f6:	68 00 00 00 00       	push   $0x0
38
            The number of sections that have been parsed.
1800
48
 80482fb:	e9 e0 ff ff ff       	jmp    80482e0 <_init+0x30>
39
        function_count:
1801
49
40
            The number of functions that have been parsed.
1802
50
          We also might expect assembler_section, which is for instance '.init'
41
        call_count:
1803
51
            in 'Disassembly of section .init:'
42
            The number of function calls that have been parsed.
1804
52
          function_name is used if we want to give this function a custom name.
43
        function_ptr_count:
1805
53
          ignore_function_pointers=True will pretend that calls to (eg) *eax do
44
            The number of function pointers that have been detected.
1806
54
            not exist; setting to False makes us create stubs for those calls.
45
        _known_stubs:
1807
55
          function_pointer_id is only used internally; it refers to labelling
46
            A set of the names of functions with type 'stub' that have been
1808
56
            of function pointers if ignore_function_pointers is False. Each
47
            parsed - used to avoid registering a stub multiple times.
1809
57
            stub is given a unique numeric ID: this parameter tells init where
48
1810
58
            to start counting these IDs from.
49
    """
1811
59
50
    def __init__(self, file_path, file_object=None, 
1812
60
        """
51
                 sections=None, ignore_ptrs=False,
1813
61
        if input_lines is None:
52
                 add_function=None, add_call=None, 
1814
62
            # get around Python's inability to pass in empty lists by value
53
                 started=None, finished=None):
1815
63
            input_lines = []
54
        """
1816
64
55
        Initialise the parser object.
1817
65
        self.name = function_name or re.search(r'<.+>', input_lines[0]).group(0).strip('<>')
56
1818
66
        self.what_do_i_call = []
57
        Raises:
1819
67
        self.position = ''
58
            FileNotFoundError:
1820
68
59
                If file_object was not provided and file_path couldn't be
1821
69
        if input_lines:
60
                opened.
1822
70
            self.position = re.search(r'^[0-9a-f]+', input_lines[0]).group(0)
61
1823
71
            self.canonical_position = ParsedObject.get_canonical_position(self.position)
62
        Arguments:
1824
72
            self.assembler_section = assembler_section
63
            file_path:
1825
73
            self.original_code = input_lines[1:]
64
                The path of the objdump output file to parse, or the path of an
1826
65
                object file to run objdump on and then parse.
1827
66
            file_object:
1828
67
                None if file_path is the path to an object file.
1829
68
                OR the file object (providing 'for line in file_object')
1830
69
            sections:
1831
70
                A list of the names of the disassembly sections to parse. An mepty
1832
71
                list will result in all sections being parsed.
1833
72
            ignore_ptrs:
1834
73
                If True, calls to function pointers will be ignored during parsing.
1835
74
            add_function:
1836
75
                A function to call when a function is parsed. Takes:
1837
76
                    name: name of the parsed function
1838
77
                    type: type of the parsed function
1839
78
            add_call:
1840
79
                A function to call when a function call is passed. Takes:
1841
80
                    caller: name of the calling function
1842
81
                    callee: name of the called function
1843
82
            started:
1844
83
                A function to call when the parse begins. Takes:
1845
84
                    parser: the Parser instance which has just began parsing..
1846
85
            finished:
1847
86
                A function to call when the parse completes. Takes:
1848
87
                    parser: the Parser instance which has just finished parsing.
1849
88
                e.g. if add_function/call have been set to write into files, 
1850
89
                then finished may be set to properly flush and close them.
1851
90
        """
1852
91
        self.path = file_path
1853
92
        try:
1854
93
            self._file = file_object or self._open_file(file_path)
1855
94
        except FileNotFoundError:
1856
95
            raise
1857
96
1858
97
        self.sections = set(sections or [])
1859
98
        self.ignore_ptrs = ignore_ptrs
1860
99
1861
100
        self.section_count = 0
1862
101
        self.function_count = 0
1863
102
        self.call_count = 0
1864
103
        self.function_ptr_count = 0
1865
104
        
1866
105
        # Avoid adding duplicate function stubs (as these are detected from
1867
106
        # function calls so may be repeated).
1868
107
        self._known_stubs = set()
1869
108
1870
109
        # By default print information to stdout.
1871
110
        def print_func(name, typ):
1872
111
            print('func {:25}{}'.format(name, typ))
1873
112
1874
113
        def print_call(caller, callee):
1875
114
            print('call {:25}{:25}'.format(caller, callee))
1876
115
1877
116
        def print_started(parser):
1878
117
            print('parse started: {}[{}]'.format(self.path, ', '.join(self.sections)))
1879
118
1880
119
1881
120
        def print_finished(parser):
1882
121
            print('parsed {} functions and {} calls'.format(self.function_count, self.call_count))
1883
122
1884
123
        self.add_function = add_function or print_func
1885
124
        self.add_call = add_call or print_call
1886
125
        self.started = lambda: (started or print_started)(self)
1887
126
        self.finished = lambda: (finished or print_finished)(self)
1888
127
1889
128
1890
129
    def _get_function_ptr_name(self):
1891
130
        """
1892
131
        Return a name for a new function pointer.
1893
132
        """
1894
133
        name = 'func_ptr_{}'.format(self.function_ptr_count)
1895
134
        self.function_ptr_count += 1
1896
135
        return name
1897
136
1898
137
    def _add_function_normal(self, name):
1899
138
        """
1900
139
        Add a function which we have full assembly code for.
1901
140
        """
1902
141
        self.add_function(name, 'normal')
1903
142
        self.function_count += 1
1904
143
1905
144
    def _add_function_ptr(self, name):
1906
145
        """
1907
146
        Add a function pointer.
1908
147
        """
1909
148
        self.add_function(name, 'pointer')
1910
149
        self.function_count += 1
1911
150
1912
151
    def _add_function_stub(self, name):
1913
152
        """
1914
153
        Add a function stub - we have its name but none of its internals.
1915
154
        """
1916
155
        if not name in self._known_stubs:
1917
156
            self._known_stubs.add(name)
1918
157
            self.add_function(name, 'stub')
1919
158
            self.function_count += 1
1920
159
1921
160
    def _add_call(self, caller, callee):
1922
161
        """
1923
162
        Add a function call from caller to callee.
1924
163
        """
1925
164
        self.add_call(caller, callee)
1926
165
        self.call_count += 1
1927
166
1928
167
    def parse(self):
1929
168
        """
1930
169
        Parse self._file.
1931
170
        """
1932
171
        self.started()
1933
172
1934
173
        if self._file is not None:
1935
174
            in_section = False          # if we are in one of self.sections
1936
175
            current_function = None     # track the caller for function calls
1937
176
1938
177
            for line in self._file:
1939
178
                if line.startswith('Disassembly'):
1940
179
                    # 'Disassembly of section <name>:\n'
1941
180
                    section = line.split(' ')[-1].rstrip(':\n')
1942
181
                    in_section = section in self.sections if self.sections else True
1943
182
                    if in_section:
1944
183
                        self.section_count += 1
1945
184
1946
185
                elif in_section:
1947
186
                    if line.endswith('>:\n'):
1948
187
                        # '<address> <<function_identifier>>:\n'
1949
188
                        # with <function_identifier> of form:
1950
189
                        # <function_name>[@plt]
1951
190
                        function_identifier = line.split('<')[-1].split('>')[0]
1952
191
1953
192
                        if '@' in function_identifier:
1954
193
                            current_function = function_identifier.split('@')[0]
1955
194
                            self._add_function_stub(current_function)
1956
195
                        else:
1957
196
                            current_function = function_identifier
1958
197
                            self._add_function_normal(current_function)
1959
198
1960
199
                    elif 'call ' in line or 'callq ' in line:
1961
200
                        # WHITESPACE to prevent picking up function names 
1962
201
                        # containing 'call'
1963
202
1964
203
                        # '<hex>: <hex> [l]call [hex] <callee_info>\n'
1965
204
                        callee_info = line.split(' ')[-1].rstrip('\n')
1966
205
1967
206
                        # Where <callee_info> is either
1968
207
                        #  1) '*(<register>)'           call to a fn pointer
1969
208
                        #  2) '$<hex>,$<hex>'           lcall to a fn pointer
1970
209
                        #  3) '<<function_identifier>>' call to a named function
1971
210
                        if '<' in callee_info and '>' in callee_info:
1972
211
                            # call to a normal or stub function
1973
212
                            # '<function_identifier>' is of form <name>[@/-/+]<...>
1974
213
                            # from which we extract name
1975
214
                            callee_is_ptr = False
1976
215
                            function_identifier = callee_info.lstrip('<').rstrip('>\n')
1977
216
                            if '@' in function_identifier:
1978
217
                                callee = function_identifier.split('@')[0]
1979
218
                                self._add_function_stub(callee)
1980
219
                            else:
1981
220
                                callee = function_identifier.split('-')[-1].split('+')[0]
1982
221
                                # Do not add this fn now - it is a normal func
1983
222
                                # so we know about it from elsewhere.
1984
223
1985
224
                        else:
1986
225
                            # Some kind of function pointer call.
1987
226
                            callee_is_ptr = True
1988
227
                            if not self.ignore_ptrs:
1989
228
                                callee = self._get_function_ptr_name()
1990
229
                                self._add_function_ptr(callee)
1991
230
1992
231
                        # Add the call.
1993
232
                        if not (self.ignore_ptrs and callee_is_ptr):
1994
233
                            self._add_call(current_function, callee)
1995
74
            
234
            
2031
75
            call_regex_compiled = (ignore_function_pointers and re.compile(r'\tcall. +[^\*]+\n')) or re.compile(r'\tcall. +.+\n')
235
            self.finished()
2032
76
236
2033
77
            lines_where_i_call = [line for line in input_lines if call_regex_compiled.search(line)]
237
            self._file.close()
2034
78
238
            result = True
2000
79
            if not ignore_function_pointers and not function_pointer_id:
2001
80
                function_pointer_id = [1]
2002
81
2003
82
            for line in lines_where_i_call:
2004
83
                # we'll catch call and callq for the moment
2005
84
                called = (call_regex_compiled.search(line).group(0))[8:].lstrip(' ').rstrip('\n')
2006
85
                if called[0] == '*' and ignore_function_pointers == False:
2007
86
                    # we have a function pointer, which we'll want to give a distinct name
2008
87
                    address = '0'
2009
88
                    name = '_._function_pointer_' + str(function_pointer_id[0])
2010
89
                    function_pointer_id[0] += 1
2011
90
2012
91
                    self.what_do_i_call.append((address, name))
2013
92
2014
93
                else: # we're not on a function pointer
2015
94
                    called_split = called.split(' ')
2016
95
                    if len(called_split) == 2:
2017
96
                        address, name = called_split
2018
97
                        name = name.strip('<>')
2019
98
                        # we still want to remove address offsets like +0x09 from the end of name
2020
99
                        match = re.match(r'^.+(?=\+0x[a-f0-9]+$)', name)
2021
100
                        if match is not None:
2022
101
                            name = match.group(0)
2023
102
                        self.what_do_i_call.append((address, name.strip('<>')))
2024
103
                    else:  # the format of the "what do i call" is not recognised as a name/address pair
2025
104
                        self.what_do_i_call.append(tuple(called_split))
2026
105
2027
106
    def __str__(self):
2028
107
        if self.position:
2029
108
            return 'Memory address ' + self.position + ' with name ' + self.name + ' in section ' + str(
2030
109
                self.assembler_section)
2035
110
        else:
239
        else:
2155
111
            return 'Name ' + self.name
240
            result = False
2156
112
241
2157
113
    def __repr__(self):
242
        return result
2158
114
        out_str = 'Disassembly of section ' + self.assembler_section + ':\n\n' + self.position + ' ' + self.name + ':\n'
243
2159
115
        return out_str + '\n'.join([' ' + line for line in self.original_code])
244
    def _open_file(self, path):
2160
116
245
        """
2161
117
246
        Open and return the file at path.
2162
118
class Parser:
247
2163
119
    # Class to manipulate the output of objdump
248
        Raises:
2164
120
249
            FileNotFoundError:
2165
121
    def __init__(self, input_file_location='', file_contents=None, sections_to_view=None, ignore_function_pointers=False):
250
                If the file fails to open.
2166
122
        """Creates a new Parser, given an input file path. That path should be an output from objdump -D.
251
2167
123
        Alternatively, supply file_contents, as a list of each line of the objdump output. We expect newlines
252
        Arguments:
2168
124
         to have been stripped from the end of each of these lines.
253
            path:
2169
125
         sections_to_view makes sure we only use the specified sections (use [] for 'all sections' and None for none).
254
                The path of the file to open.
2170
126
        """
255
        """
2052
127
        if file_contents is None:
2053
128
            file_contents = []
2054
129
2055
130
        if sections_to_view is None:
2056
131
            sections_to_view = []
2057
132
2058
133
        if input_file_location:
2059
134
            file_to_read = open(input_file_location, 'r')
2060
135
            self.source_string_list = [line for line in file_to_read]
2061
136
            file_to_read.close()
2062
137
        elif file_contents:
2063
138
            self.source_string_list = [string + '\n' for string in file_contents]
2064
139
        self.parsed_objects = []
2065
140
        self.sections_to_view = sections_to_view
2066
141
        self.ignore_function_pointers = ignore_function_pointers
2067
142
        self.pointer_identifier = [1]
2068
143
2069
144
    def create_objects(self):
2070
145
        """ Go through the source_string_list, getting object names (like 'main') along with the corresponding
2071
146
         definitions, and put them into parsed_objects """
2072
147
        if self.sections_to_view is None:
2073
148
            return
2074
149
2075
150
        is_in_section = lambda name: self.sections_to_view == [] or name in self.sections_to_view
2076
151
2077
152
        parsed_objects = []
2078
153
        current_object = []
2079
154
        current_section = ''
2080
155
        regex_compiled_addr_and_name = re.compile(r'[0-9a-f]+ <.+>:\n')
2081
156
        regex_compiled_section = re.compile(r'section .+:\n')
2082
157
2083
158
        for line in self.source_string_list[4:]:  # we bodge, since the file starts with a little bit of guff
2084
159
            if regex_compiled_addr_and_name.match(line):
2085
160
                # we are a starting line
2086
161
                current_object = [line]
2087
162
            elif re.match(r'Disassembly of section', line):
2088
163
                current_section = regex_compiled_section.search(line).group(0).lstrip('section ').rstrip(':\n')
2089
164
                current_object = []
2090
165
            elif line == '\n':
2091
166
                # we now need to stop parsing the current block, and store it
2092
167
                if len(current_object) > 0 and is_in_section(current_section):
2093
168
                    parsed_objects.append(ParsedObject(input_lines=current_object, assembler_section=current_section,
2094
169
                                                       ignore_function_pointers=self.ignore_function_pointers,
2095
170
                                                       function_pointer_id=self.pointer_identifier))
2096
171
            else:
2097
172
                current_object.append(line)
2098
173
2099
174
        # now we should be done. We assumed that blocks begin with r'[0-9a-f]+ <.+>:\n' and end with a newline.
2100
175
        # clear duplicates:
2101
176
2102
177
        self.parsed_objects = []
2103
178
        for obj in parsed_objects:
2104
179
            if obj not in self.parsed_objects: # this is so that if we jump into the function at an offset,
2105
180
                # we still register it as being the old function, not some new function at a different address
2106
181
                # with the same name
2107
182
                self.parsed_objects.append(obj)
2108
183
2109
184
                # by this point, each object contains a self.what_do_i_call which is a list of tuples
2110
185
                #  ('address', 'name') if the address and name were recognised, or else (thing1, thing2, ...)
2111
186
                # where the instruction was call thing1 thing2 thing3... .
2112
187
2113
188
    def object_lookup(self, object_name='', object_address=''):
2114
189
        """Returns the object with name object_name or address object_address (at least one must be given).
2115
190
        If objects with the given name or address
2116
191
        are not found, returns None."""
2117
192
2118
193
        if object_name == '' and object_address == '':
2119
194
            return None
2120
195
2121
196
        trial_obj = self.parsed_objects
2122
197
2123
198
        if object_name != '':
2124
199
            trial_obj = [obj for obj in trial_obj if obj.name == object_name]
2125
200
2126
201
        if object_address != '':
2127
202
            trial_obj = [obj for obj in trial_obj if
2128
203
                         obj.canonical_position == ParsedObject.get_canonical_position(object_address)]
2129
204
2130
205
        if len(trial_obj) == 0:
2131
206
            return None
2132
207
2133
208
        return trial_obj
2134
209
2135
210
def get_parsed_objects(filepath, sections_to_view, not_object_file, readable=False, ignore_function_pointers=False):
2136
211
    if sections_to_view is None:
2137
212
        sections_to_view = []  # because we use None for "no sections"; the intent of not providing any sections
2138
213
        # on the command line was to look at all sections, not none
2139
214
2140
215
    # first, check whether the given file exists
2141
216
    if not os.path.isfile(filepath):
2142
217
        # we'd like to use FileNotFoundError, but we might be running under
2143
218
        # Python 2, which doesn't have it.
2144
219
        raise IOError(filepath + 'is not found.')
2145
220
2146
221
    #now the file should exist
2147
222
    if not not_object_file:  #if it is something we need to run through objdump first
2148
223
        #we need first to run the object file through objdump
2149
224
2150
225
        objdump_file_contents = subprocess.check_output(['objdump', '-D', filepath])
2151
226
        objdump_str = objdump_file_contents.decode('utf-8')
2152
227
2153
228
        p = Parser(file_contents=objdump_str.split('\n'), sections_to_view=sections_to_view, ignore_function_pointers=ignore_function_pointers)
2154
229
    else:
2171
230
        try:
256
        try:
2183
231
            p = Parser(input_file_location=filepath, sections_to_view=sections_to_view, ignore_function_pointers=ignore_function_pointers)
257
            result = open(path)
2184
232
        except UnicodeDecodeError:
258
        except Exception as e:
2185
233
            logging.error('File could not be parsed as a string. Did you mean to supply --object-file?')
259
            raise FileNotFoundError("parser failed to open `{}`: {}".format(path, e.strerror))
2186
234
            return False
260
2187
235
261
        return result
2188
236
    if readable: # if we're being called from the command line
262
2189
237
        print('File read; beginning parse.')
263
2190
238
    #file is now read, and we start parsing
264
def run_objdump(input_file):
2191
239
265
    """
2192
240
    p.create_objects()
266
    Run the objdump command on the file with the given path.
2193
241
    return p.parsed_objects
267
2194
268
    Return the input file path and a file object representing the result of
2195
269
    the objdump.
2196
270
2197
271
    Arguments:
2198
272
        input_file:
2199
273
            The path of the file to run objdump on.
2200
274
        
2201
275
    """
2202
276
    # A single section can be specified for parsing with the -j flag,
2203
277
    # but it is not obviously possible to parse multiple sections like this.
2204
278
    p = subprocess.Popen(['objdump', '-d', input_file, '--no-show-raw-insn'], 
2205
279
                         stdout=subprocess.PIPE)
2206
280
    g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$'], stdin=p.stdout, stdout=subprocess.PIPE)
2207
281
    return input_file, g.stdout
2208
282
2209
242
283
2210
243
def main():
284
def main():
2221
244
    argumentparser = argparse.ArgumentParser(description="Parse the output of objdump.")
285
    """
2222
245
    argumentparser.add_argument('--filepath', metavar="FILEPATH", help="path to input file", type=str, nargs=1)
286
    Run the parser from the command line.
2223
246
    argumentparser.add_argument('--not-object-file', help="import text objdump output instead of the compiled file", default=False,
287
2224
247
                                action='store_true')
288
    The path of the target file, the sections to view and the ignore function
2225
248
    argumentparser.add_argument('--sections-to-view', metavar="SECTIONS",
289
    pointers flag are set with command line arguments.
2226
249
                                help="sections of disassembly to view, like '.text'; leave blank for 'all'",
290
    """
2227
250
                                type=str, nargs='*')
291
    ap = argparse.ArgumentParser(description="Parse the output of objdump.")
2228
251
    argumentparser.add_argument('--ignore-function-pointers', help='whether to skip parsing calls to function pointers', action='store_true', default=False)
292
    ap.add_argument('--filepath', metavar="FILEPATH", 
2229
252
293
                    help="path to input file", type=str, nargs=1)
2230
253
    parsed = argumentparser.parse_args()
294
2231
295
    ap.add_argument('--sections-to-view', metavar="SECTIONS",
2232
296
                    help="disassembly sections to view, eg '.text'; leave blank for 'all'",
2233
297
                    type=str, nargs='*')
2234
298
    ap.add_argument('--ignore-function-pointers', 
2235
299
                    help='skip parsing calls to function pointers', 
2236
300
                    action='store_true', default=False)
2237
301
2238
302
    args = ap.parse_args()
2239
254
    
303
    
2258
255
    filepath = parsed.filepath[0]
304
    filepath = args.filepath[0]
2259
256
    sections_to_view = parsed.sections_to_view
305
    sections = args.sections_to_view
2260
257
    not_object_file = parsed.not_object_file
306
    ignore_ptrs = args.ignore_function_pointers
2261
258
    readable = True
307
2262
259
    function_pointers = parsed.ignore_function_pointers
308
    parser = Parser(filepath, sections, ignore_ptrs)
2263
260
309
    parser.parse()
2264
261
    parsed_objs = get_parsed_objects(filepath, sections_to_view, not_object_file, readable, function_pointers)
310
2265
262
    if parsed_objs is False:
311
2266
263
        return 1
312
if __name__ == '__main__':
2249
264
2250
265
    if readable:
2251
266
        for named_function in parsed_objs:
2252
267
            print(named_function.name)
2253
268
            print([f[-1] for f in named_function.what_do_i_call])  # use [-1] to get the last element, since:
2254
269
        #either we are in ('address', 'name'), when we want the last element, or else we are in (thing1, thing2, ...)
2255
270
        #so for the sake of argument we'll take the last thing
2256
271
2257
272
if __name__ == "__main__":
2267
273
    main()
313
    main()
2268
274
314
2269
=== modified file 'src/sextant/query.py'
2270
--- src/sextant/query.py	2014-08-26 16:33:20 +0000
2271
+++ src/sextant/query.py	2014-10-23 12:33:12 +0000
2272
@@ -14,7 +14,7 @@
2273
14
from .export import ProgramConverter
14
from .export import ProgramConverter
2274
15
15
2275
16
16
2277
17
def query(remote_neo4j, input_query, display_neo4j='', program_name=None,
17
def query(connection, display_neo4j='', program_name=None,
2278
18
          argument_1=None, argument_2=None, suppress_common=False):
18
          argument_1=None, argument_2=None, suppress_common=False):
2279
19
    """
19
    """
2280
20
    Run a query against the database at remote_neo4j.
20
    Run a query against the database at remote_neo4j.
2281
@@ -36,24 +36,24 @@
2282
36
36
2283
37
    """
37
    """
2284
38
38
2289
39
    if display_neo4j:
39
    # if display_neo4j:
2290
40
        display_url = display_neo4j
40
    #     display_url = display_neo4j
2291
41
    else:
41
    # else:
2292
42
        display_url = remote_neo4j
42
    #     display_url = remote_neo4j
2293
43
43
2307
44
    try:
44
    # try:
2308
45
        db = db_api.SextantConnection(remote_neo4j)
45
    #     db = db_api.SextantConnection(remote_neo4j)
2309
46
    except requests.exceptions.ConnectionError as err:
46
    # except requests.exceptions.ConnectionError as err:
2310
47
        logging.error("Could not connect to Neo4J server {}. Are you sure it is running?".format(display_url))
47
    #     logging.error("Could not connect to Neo4J server {}. Are you sure it is running?".format(display_url))
2311
48
        logging.error(str(err))
48
    #     logging.error(str(err))
2312
49
        return 2
49
    #     return 2
2313
50
    #Not supported in python 2
50
    # #Not supported in python 2
2314
51
    #except (urllib.exceptions.MaxRetryError):
51
    # #except (urllib.exceptions.MaxRetryError):
2315
52
     #   logging.error("Connection was refused to {}. Are you sure the server is running?".format(remote_neo4j))
52
    #  #   logging.error("Connection was refused to {}. Are you sure the server is running?".format(remote_neo4j))
2316
53
      #  return 2
53
    #   #  return 2
2317
54
    except Exception as err:
54
    # except Exception as err:
2318
55
        logging.exception(str(err))
55
    #     logging.exception(str(err))
2319
56
        return 2
56
    #     return 2
2320
57
57
2321
58
    prog = None
58
    prog = None
2322
59
    names_list = None
59
    names_list = None
2323
@@ -66,38 +66,38 @@
2324
66
        if argument_1 is None:
66
        if argument_1 is None:
2325
67
            print('Supply one function name to functions-calling.')
67
            print('Supply one function name to functions-calling.')
2326
68
            return 1
68
            return 1
2328
69
        prog = db.get_all_functions_calling(program_name, argument_1)
69
        prog = connection.get_all_functions_calling(program_name, argument_1)
2329
70
    elif input_query == 'functions-called-by':
70
    elif input_query == 'functions-called-by':
2330
71
        if argument_1 is None:
71
        if argument_1 is None:
2331
72
            print('Supply one function name to functions-called-by.')
72
            print('Supply one function name to functions-called-by.')
2332
73
            return 1
73
            return 1
2334
74
        prog = db.get_all_functions_called(program_name, argument_1)
74
        prog = connection.get_all_functions_called(program_name, argument_1)
2335
75
    elif input_query == 'all-call-paths':
75
    elif input_query == 'all-call-paths':
2336
76
        if argument_1 is None and argument_2 is None:
76
        if argument_1 is None and argument_2 is None:
2337
77
            print('Supply two function names to calls-between.')
77
            print('Supply two function names to calls-between.')
2338
78
            return 1
78
            return 1
2340
79
        prog = db.get_call_paths(program_name, argument_1, argument_2)
79
        prog = connection.get_call_paths(program_name, argument_1, argument_2)
2341
80
    elif input_query == 'whole-program':
80
    elif input_query == 'whole-program':
2343
81
        prog = db.get_whole_program(program_name)
81
        prog = connection.get_whole_program(program_name)
2344
82
    elif input_query == 'shortest-call-path':
82
    elif input_query == 'shortest-call-path':
2345
83
        if argument_1 is None and argument_2 is None:
83
        if argument_1 is None and argument_2 is None:
2346
84
            print('Supply two function names to shortest-path.')
84
            print('Supply two function names to shortest-path.')
2347
85
            return 1
85
            return 1
2349
86
        prog = db.get_shortest_path_between_functions(program_name, argument_1, argument_2)
86
        prog = connection.get_shortest_path_between_functions(program_name, argument_1, argument_2)
2350
87
    elif input_query == 'functions':
87
    elif input_query == 'functions':
2351
88
        if program_name is not None:
88
        if program_name is not None:
2353
89
            func_names = db.get_function_names(program_name)
89
            func_names = connection.get_function_names(program_name)
2354
90
            if func_names:
90
            if func_names:
2355
91
                names_list = list(func_names)
91
                names_list = list(func_names)
2356
92
            else:
92
            else:
2357
93
                print('No functions were found in program %s on server %s.' % (program_name, display_url))
93
                print('No functions were found in program %s on server %s.' % (program_name, display_url))
2358
94
        else:
94
        else:
2360
95
            list_of_programs = db.get_program_names()
95
            list_of_programs = connection.get_program_names()
2361
96
            if not list_of_programs:
96
            if not list_of_programs:
2362
97
                print('Server %s database empty.' % (display_url))
97
                print('Server %s database empty.' % (display_url))
2363
98
                return 0
98
                return 0
2364
99
99
2366
100
            func_list = [db.get_function_names(prog_name)
100
            func_list = [connection.get_function_names(prog_name)
2367
101
                         for prog_name in list_of_programs]
101
                         for prog_name in list_of_programs]
2368
102
102
2369
103
            if not func_list:
103
            if not func_list:
2370
@@ -105,7 +105,7 @@
2371
105
            else:
105
            else:
2372
106
                names_list = func_list
106
                names_list = func_list
2373
107
    elif input_query == 'programs':
107
    elif input_query == 'programs':
2375
108
        list_found = list(db.get_program_names())
108
        list_found = list(connection.get_program_names())
2376
109
        if not list_found:
109
        if not list_found:
2377
110
            print('No programs were found on server {}.'.format(display_url))
110
            print('No programs were found on server {}.'.format(display_url))
2378
111
        else:
111
        else:
2379
@@ -122,7 +122,5 @@
2380
122
        print('Nothing was returned from the query.')
122
        print('Nothing was returned from the query.')
2381
123
123
2382
124
124
2387
125
def audit(remote_neo4j):
125
def audit(connection):
2388
126
    db = db_api.SextantConnection(remote_neo4j)
126
    return connection.programs_with_metadata()
2385
127
2386
128
    return db.programs_with_metadata()
2389
129
127
2390
=== added file 'src/sextant/sshmanager.py'
2391
--- src/sextant/sshmanager.py	1970-01-01 00:00:00 +0000
2392
+++ src/sextant/sshmanager.py	2014-10-23 12:33:12 +0000
2393
@@ -0,0 +1,278 @@
2394
1
import os
2395
2
import getpass
2396
3
import logging
2397
4
import subprocess
2398
5
2399
6
"""Provide a class to manage an SSH tunnel and controller"""
2400
7
__all__ = ('SSHConnectionError', 'SSHCommandError', 'SSHManager')
2401
8
2402
9
# The location of the temporary directory to create on the REMOTE machine.
2403
10
# Temporary files will be scp'd here prior to upload to the neo4j database.
2404
11
TMP_DIR = '/tmp/sextant'
2405
12
2406
13
2407
14
class SSHConnectionError(Exception):
2408
15
    """
2409
16
    An exception raised when an attempt to establish an ssh conneciton fails.
2410
17
    """
2411
18
    pass
2412
19
2413
20
2414
21
class SSHCommandError(Exception):
2415
22
    """
2416
23
    An exception raised when an attempt to run a command over ssh fails.
2417
24
    """
2418
25
    pass
2419
26
2420
27
2421
28
class SSHManager(object):
2422
29
    """
2423
30
    Manage an ssh tunnel with port forwarding.
2424
31
2425
32
    Attributes:
2426
33
        local_port:
2427
34
            The port number on the local machine to forward.
2428
35
        remote_host:
2429
36
            The host to ssh into.
2430
37
        remote_port:
2431
38
            The port number on the remote host to connect to.
2432
39
        ssh_user:
2433
40
            The username to use for sshing - defaults to None, in which case
2434
41
            the ssh connection uses the username of the user who ran sextant.
2435
42
2436
43
        _controller_name:
2437
44
            The base of the identifying name for the ssh controller - the
2438
45
            actual name will be a combination of this and the local port.
2439
46
        _is_localhost:
2440
47
            True if we are trying to ssh into localhost. In this case do not
2441
48
            open the tunnel, just provide the right api so the rest of Sextant
2442
49
            need not special case.
2443
50
    """
2444
51
2445
52
    def __init__(self, local_port, remote_host, remote_port, 
2446
53
                 ssh_user=None, is_localhost=False):
2447
54
        """
2448
55
        Open an SSH tunnel with multiplexing enabled.
2449
56
2450
57
        Raises:
2451
58
            ValueError:
2452
59
                If local_port or remote_port are not positive integers
2453
60
2454
61
        Arguments:
2455
62
            local_port:
2456
63
                The number of the local port to forward.
2457
64
            remote_host:
2458
65
                The name of the remote host to connect to.
2459
66
            remote_port:
2460
67
                The port number on the remote host to connect to.
2461
68
            ssh_user:
2462
69
                An alternative user name to use for the ssh login.
2463
70
            is_localhost:
2464
71
                True if we are trying to ssh into localhost.
2465
72
        """
2466
73
        if not (isinstance(local_port, int) and local_port > 0):
2467
74
            raise ValueError(
2468
75
                'Local port {} must be a positive integer.'.format(local_port))
2469
76
        if not (isinstance(remote_port, int) and remote_port > 0):
2470
77
            raise ValueError(
2471
78
                'Remote port {} must be a positive integer.'.format(remote_port))
2472
79
2473
80
        self.local_port = local_port
2474
81
        self.remote_host = remote_host
2475
82
        self.remote_port = remote_port
2476
83
        self.ssh_user = ssh_user
2477
84
2478
85
        self._tmp_dir = '{}-{}'.format(TMP_DIR, self.ssh_user or getpass.getuser())
2479
86
2480
87
        self._controller_name = 'sextantcontroller{}'.format(local_port)
2481
88
        self._is_localhost = is_localhost
2482
89
2483
90
        self._open()
2484
91
2485
92
    def _open(self):
2486
93
        """
2487
94
        Helper function to open the SSH tunnel.
2488
95
2489
96
        Raises:
2490
97
            SSHConnectionError:
2491
98
                If the ssh command failed to run.
2492
99
        """
2493
100
        if self._is_localhost:
2494
101
            return
2495
102
2496
103
        # This cmd string will be .format()ed in a few lines' time.
2497
104
        cmd = ['ssh']
2498
105
2499
106
        if self.ssh_user:
2500
107
            # ssh -l {user} ... sets the remote login username
2501
108
            cmd.extend(['-l', self.ssh_user])
2502
109
2503
110
        # -L localport:localhost:remoteport forwards the port.
2504
111
        port_fwd = '{}:localhost:{}'.format(self.local_port, self.remote_port)
2505
112
2506
113
        # -M makes SSH able to accept slave connections.
2507
114
        # -S sets the location of a control socket (in this case, sextantcontroller.
2508
115
        #    with a unique identifier appended, just in case we run sextant twice.
2509
116
        #    simultaneously), so we know how to close the port again.
2510
117
        # -f goes into background; -N does not execute a remote command;
2511
118
        # -T says to remote host that we don't want a text shell.
2512
119
        cmd.extend(['-M', '-S', self._controller_name, '-fNT', 
2513
120
                    '-L', port_fwd, self.remote_host])
2514
121
2515
122
        logging.debug('Opening SSH tunnel with cmd: {}'.format(' '.join(cmd)))
2516
123
2517
124
        rc = subprocess.call(cmd)
2518
125
        if rc:
2519
126
            raise SSHConnectionError('SSH setup failed with error {}'.format(rc))
2520
127
2521
128
        logging.debug('SSH tunnel created')
2522
129
2523
130
        self._make_tmp_dir()
2524
131
2525
132
    def close(self):
2526
133
        """
2527
134
        Close the SSH tunnel after cleaning the temp directory.
2528
135
        """
2529
136
        if self._is_localhost:
2530
137
            return
2531
138
2532
139
        # ssh -O sends a command to the slave specified in -S, -q for quiet.
2533
140
        cmd = ['ssh', '-S', self._controller_name, 
2534
141
               '-O', 'exit', '-q', self.remote_host]
2535
142
2536
143
        logging.debug('Shutting down SSH tunnel with cmd: `{}`'
2537
144
                      .format(' '.join(cmd)))
2538
145
2539
146
        # SSH has a bug on some systems which causes it to ignore the -q flag
2540
147
        # meaning it prints "Exit request sent." to stderr.
2541
148
        # To avoid this, we grab stderr temporarily, and see if it's that string;
2542
149
        # if it is, suppress it.
2543
150
        pr = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2544
151
        stdout, stderr = pr.communicate()
2545
152
        if stderr.rstrip() != 'Exit request sent.':
2546
153
            logging.error('SSH shutdown stderr: {}'.format(stderr))
2547
154
2548
155
        if pr.returncode == 0:
2549
156
            logging.debug('Shut down successfully')
2550
157
        else:
2551
158
            logging.error('SSH shutdown failed with code {}'
2552
159
                          .format(pr.returncode))
2553
160
2554
161
        # Clean the temporary directory we created earlier.
2555
162
        self._delete_tmp_dir()
2556
163
2557
164
    def _call(self, *args):
2558
165
        """
2559
166
        Execute a command on the remote machine over SSH.
2560
167
2561
168
        Return a tuple of rc, stdout, stderr from the process call.
2562
169
2563
170
        Arguments:
2564
171
            *args:
2565
172
                Strings containing the individual words of the command to
2566
173
                execute. E.g. _call('ls', '-lh', '.').
2567
174
        """
2568
175
        if self._is_localhost:
2569
176
            return (1, None, 'Cannot call SSH command from localhost')
2570
177
2571
178
        ssh_cmd = ['ssh', '-S', self._controller_name, self.remote_host]
2572
179
        ssh_cmd.extend(args)
2573
180
        p = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2574
181
        stdout, stderr = p.communicate()
2575
182
2576
183
        if p.returncode:
2577
184
            logging.debug('Call to `{}` failed with code: {}, stderr: {}'
2578
185
                          .format(' '.join(ssh_cmd), p.returncode, stderr))
2579
186
2580
187
        return p.returncode, stdout, stderr
2581
188
2582
189
    def _make_tmp_dir(self):
2583
190
        """
2584
191
        Create the per-user temporary directory on the remote machine.
2585
192
        """
2586
193
        self._call('mkdir', '-p', self._tmp_dir)
2587
194
2588
195
    def _delete_tmp_dir(self):
2589
196
        """
2590
197
        Remove the temporary directory on the remote machine.
2591
198
        """
2592
199
        self._call('rm', '-r', self._tmp_dir)
2593
200
2594
201
2595
202
    def send_to_tmp_dir(self, path_list):
2596
203
        """
2597
204
        Send the specified files to the temporary directory on the remote machine.
2598
205
2599
206
        Return an iterator of save paths on the remote machine.  
2600
207
        Raises:
2601
208
            ValueError:
2602
209
                If no file paths were provided, or if one or more of the
2603
210
                provided paths is not an actual file.
2604
211
            SSHCommandError:
2605
212
                If the scp command failed for any reason.
2606
213
2607
214
        Arguments:
2608
215
            path_list:
2609
216
                Iterator of paths to the files on the local machine. All files 
2610
217
                will be checked before copying to ensure that they exist and 
2611
218
                to prevent passing arbitrary arguments to the ssh _call 
2612
219
                command.
2613
220
        """
2614
221
        if not path_list:
2615
222
            raise ValueError('attempt to copy zero files')
2616
223
2617
224
        # If we are in localhost, we are not controlling the TMP_DIR,
2618
225
        # so the files are already there.
2619
226
        if self._is_localhost:
2620
227
            return path_list
2621
228
2622
229
        # Make sure we can take the len of path_list and iterate over it
2623
230
        # more than once.
2624
231
        path_list = list(path_list)
2625
232
2626
233
        # Check that actual files are being copied - not random strings.
2627
234
        to_copy = [f for f in path_list if os.path.isfile(f)]
2628
235
2629
236
        if len(to_copy) < len(path_list):
2630
237
            missed = [f for f in path_list if not f in to_copy]
2631
238
            raise ValueError('Attempted to copy non existant files: {}'
2632
239
                             .format(', '.join(missed)))
2633
240
2634
241
        scp_cmd = ['scp']
2635
242
        scp_cmd.extend(to_copy)
2636
243
        scp_cmd.append('{}:{}'.format(self.remote_host, self._tmp_dir))
2637
244
2638
245
        proc = subprocess.Popen(scp_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2639
246
        rc = proc.wait()
2640
247
        if rc:
2641
248
            raise SSHCommandError('scp failed with code {}: {}'.format(rc, stderr))
2642
249
2643
250
        return (os.path.join(self._tmp_dir, os.path.basename(f)) for f in to_copy)
2644
251
2645
252
    def remove_from_tmp_dir(self, path_list):
2646
253
        """
2647
254
        Delete the files specified as arguments from the remote machine.
2648
255
2649
256
        The output of send_to_tmp_dir may be passed as input to this function.
2650
257
2651
258
        Raises:
2652
259
            SSHCommandError:
2653
260
                If the rm command fails for any reason.
2654
261
2655
262
        Arguments:
2656
263
            path_list:
2657
264
                Iterator of paths of the files on the remote machine, relative 
2658
265
                to the temporary directory. E.g. remove_from_tmp_dir('foo') 
2659
266
                will delete the file self._tmp_dir/foo
2660
267
        """
2661
268
        if self._is_localhost:
2662
269
            return
2663
270
2664
271
        # Assume we can trust this file list.
2665
272
        paths = [os.path.join(self._tmp_dir, os.path.basename(f)) for f in path_list]
2666
273
        self._call('rm', *paths)
2667
274
2668
275
            
2669
276
2670
277
2671
278
2672
0
279
2673
=== added file 'src/sextant/test_all.sh'
2674
--- src/sextant/test_all.sh	1970-01-01 00:00:00 +0000
2675
+++ src/sextant/test_all.sh	2014-10-23 12:33:12 +0000
2676
@@ -0,0 +1,4 @@
2677
1
#!/usr/bin/bash
2678
2
2679
3
PYTHONPATH=$PYTHONPATH:~/.
2680
4
python -m unittest discover --pattern=test_*.py
2681
0
5
2682
=== added file 'src/sextant/test_csvwriter.py'
2683
--- src/sextant/test_csvwriter.py	1970-01-01 00:00:00 +0000
2684
+++ src/sextant/test_csvwriter.py	2014-10-23 12:33:12 +0000
2685
@@ -0,0 +1,89 @@
2686
1
#!/usr/bin/python
2687
2
import unittest
2688
3
from csvwriter import CSVWriter
2689
4
import subprocess
2690
5
from os import listdir
2691
6
2692
7
class TestSequence(unittest.TestCase):
2693
8
    def get_writer(self, path='tmp_test', headers=['name', 'type'], split=100):
2694
9
        return CSVWriter(path, headers, split)
2695
10
2696
11
    def tearDown(self):
2697
12
        to_rm = [f for f in listdir('.') if f.startswith('tmp_test') and f.endswith('.csv')]
2698
13
        if to_rm:
2699
14
            rc = subprocess.call(['rm'] + to_rm)
2700
15
            if rc:
2701
16
                msg = 'failed to clean'
2702
17
            else:
2703
18
                msg = 'cleaned'
2704
19
            print('{} {} files {}'.format(msg, len(to_rm), to_rm))
2705
20
2706
21
    def test_headers(self):
2707
22
        # check that headers are being written correctly
2708
23
        headers = ['some', 'headers', 'to', 'check']
2709
24
        writer = self.get_writer(headers=headers)
2710
25
        writer.finish()
2711
26
2712
27
        expected_path = 'tmp_test0.csv'
2713
28
        self.assertEquals(writer.file_iter().next(), expected_path)
2714
29
        writer_file = open('tmp_test0.csv', 'r')
2715
30
2716
31
        self.assertEquals(writer_file.readline(), 'some,headers,to,check\n')
2717
32
        self.assertFalse(writer_file.readline()) # check that nothing extra is written
2718
33
2719
34
        writer_file.close()
2720
35
2721
36
    def test_writing(self):
2722
37
        # check that csv entries are written correctly, and errors 
2723
38
        # appropriately raised for invalid input
2724
39
        writer = self.get_writer()
2725
40
2726
41
        self.assertRaises(ValueError, writer.write, 'too short')
2727
42
        self.assertRaises(ValueError, writer.write, 'slightly', 'too', 'long')
2728
43
        writer.write('just', 'write')
2729
44
2730
45
        writer.finish()
2731
46
2732
47
        writer_file = open(writer.file_iter().next(), 'r+')
2733
48
2734
49
        self.assertEqual(writer_file.readline(), 'name,type\n')
2735
50
        self.assertEqual(writer_file.readline(), 'just,write\n')
2736
51
        self.assertFalse(writer_file.readline())
2737
52
2738
53
        writer_file.close()
2739
54
2740
55
    def test_split(self):
2741
56
        split = 10
2742
57
        files = 10
2743
58
        writer = self.get_writer(split=split)
2744
59
2745
60
        for i in xrange(files*(split-1)): # split-1 to account for header line
2746
61
            writer.write('an', 'entry')
2747
62
2748
63
        writer.finish()
2749
64
2750
65
        gen_count = sum(1 for f in writer.file_iter())
2751
66
        self.assertEqual(gen_count, files,
2752
67
                         'generated {} files, expected {}'
2753
68
                         .format(gen_count, files))
2754
69
2755
70
        for f in writer.file_iter():
2756
71
            with open(f, 'r+') as wf:
2757
72
                header_line = wf.readline()
2758
73
                header_expected = 'name,type\n'
2759
74
                self.assertEqual(header_line, header_expected,
2760
75
                                 '{} contained header {}, expected {}'
2761
76
                                 .format(f, header_line, header_expected)) # check headers
2762
77
2763
78
            # check line count
2764
79
            with open(f, 'r+') as wf:
2765
80
                line_count = sum(1 for line in wf)
2766
81
                self.assertEqual(line_count, split,
2767
82
                                 '{} contained {} lines, expected {}'
2768
83
                                 .format(f, line_count, split))
2769
84
2770
85
2771
86
if __name__ == '__main__':
2772
87
    unittest.main()
2773
88
2774
89
2775
0
90
2776
=== renamed file 'src/sextant/tests.py' => 'src/sextant/test_db_api.py' (properties changed: -x to +x)
2777
--- src/sextant/tests.py	2014-08-14 15:23:39 +0000
2778
+++ src/sextant/test_db_api.py	2014-10-23 12:33:12 +0000
2779
@@ -1,3 +1,4 @@
2780
1
#!/usr/bin/python
2781
1
# -----------------------------------------
2
# -----------------------------------------
2782
2
# Sextant
3
# Sextant
2783
3
# Copyright 2014, Ensoft Ltd.
4
# Copyright 2014, Ensoft Ltd.
2784
@@ -10,56 +11,69 @@
2785
10
from db_api import Function
11
from db_api import Function
2786
11
from db_api import FunctionQueryResult
12
from db_api import FunctionQueryResult
2787
12
from db_api import SextantConnection
13
from db_api import SextantConnection
2789
13
from db_api import Validator
14
from db_api import validate_query
2790
14
15
2791
15
16
2792
16
class TestFunctionQueryResults(unittest.TestCase):
17
class TestFunctionQueryResults(unittest.TestCase):
2794
17
    def setUp(self):
18
    @classmethod
2795
19
    def setUpClass(cls):
2796
18
        # we need to set up the remote database by using the neo4j_input_api
20
        # we need to set up the remote database by using the neo4j_input_api
2828
19
        self.remote_url = 'http://ensoft-sandbox:7474'
21
        cls.remote_url = 'http://ensoft-sandbox:7474'
2829
20
22
2830
21
        self.setter_connection = SextantConnection(self.remote_url)
23
        cls.setter_connection = SextantConnection('ensoft-sandbox', 7474)
2831
22
        self.program_1_name = 'testprogram'
24
2832
23
        self.upload_program = self.setter_connection.new_program(self.program_1_name)
25
        cls.program_1_name = 'testprogram'
2833
24
        self.upload_program.add_function('func1')
26
        cls.one_node_program_name = 'testprogram1'
2834
25
        self.upload_program.add_function('func2')
27
        cls.empty_program_name = 'testprogramblank'
2835
26
        self.upload_program.add_function('func3')
28
2836
27
        self.upload_program.add_function('func4')
29
        # if anything failed before, delete programs now
2837
28
        self.upload_program.add_function('func5')
30
        cls.setter_connection.delete_program(cls.program_1_name)
2838
29
        self.upload_program.add_function('func6')
31
        cls.setter_connection.delete_program(cls.one_node_program_name)
2839
30
        self.upload_program.add_function('func7')
32
        cls.setter_connection.delete_program(cls.empty_program_name)
2840
31
        self.upload_program.add_function_call('func1', 'func2')
33
2841
32
        self.upload_program.add_function_call('func1', 'func4')
34
2842
33
        self.upload_program.add_function_call('func2', 'func1')
35
        cls.upload_program = cls.setter_connection.new_program(cls.program_1_name)
2843
34
        self.upload_program.add_function_call('func2', 'func4')
36
        cls.upload_program.add_function('func1')
2844
35
        self.upload_program.add_function_call('func3', 'func5')
37
        cls.upload_program.add_function('func2')
2845
36
        self.upload_program.add_function_call('func4', 'func4')
38
        cls.upload_program.add_function('func3')
2846
37
        self.upload_program.add_function_call('func4', 'func5')
39
        cls.upload_program.add_function('func4')
2847
38
        self.upload_program.add_function_call('func5', 'func1')
40
        cls.upload_program.add_function('func5')
2848
39
        self.upload_program.add_function_call('func5', 'func2')
41
        cls.upload_program.add_function('func6')
2849
40
        self.upload_program.add_function_call('func5', 'func3')
42
        cls.upload_program.add_function('func7')
2850
41
        self.upload_program.add_function_call('func6', 'func7')
43
        cls.upload_program.add_call('func1', 'func2')
2851
42
44
        cls.upload_program.add_call('func1', 'func4')
2852
43
        self.upload_program.commit()
45
        cls.upload_program.add_call('func2', 'func1')
2853
44
46
        cls.upload_program.add_call('func2', 'func4')
2854
45
        self.one_node_program_name = 'testprogram1'
47
        cls.upload_program.add_call('func3', 'func5')
2855
46
        self.upload_one_node_program = self.setter_connection.new_program(self.one_node_program_name)
48
        cls.upload_program.add_call('func4', 'func4')
2856
47
        self.upload_one_node_program.add_function('lonefunc')
49
        cls.upload_program.add_call('func4', 'func5')
2857
48
50
        cls.upload_program.add_call('func5', 'func1')
2858
49
        self.upload_one_node_program.commit()
51
        cls.upload_program.add_call('func5', 'func2')
2859
52
        cls.upload_program.add_call('func5', 'func3')
2860
53
        cls.upload_program.add_call('func6', 'func7')
2861
54
2862
55
        cls.upload_program.commit()
2863
56
2864
57
        cls.upload_one_node_program = cls.setter_connection.new_program(cls.one_node_program_name)
2865
58
        cls.upload_one_node_program.add_function('lonefunc')
2866
59
2867
60
        cls.upload_one_node_program.commit()
2868
50
        
61
        
2881
51
        self.empty_program_name = 'testprogramblank'
62
        cls.upload_empty_program = cls.setter_connection.new_program(cls.empty_program_name)
2882
52
        self.upload_empty_program = self.setter_connection.new_program(self.empty_program_name)
63
2883
53
64
        cls.upload_empty_program.commit()
2884
54
        self.upload_empty_program.commit()
65
2885
55
66
        cls.getter_connection = cls.setter_connection
2886
56
        self.getter_connection = SextantConnection(self.remote_url)
67
2887
57
68
2888
58
    def tearDown(self):
69
    @classmethod
2889
59
        self.setter_connection.delete_program(self.upload_program.program_name)
70
    def tearDownClass(cls):
2890
60
        self.setter_connection.delete_program(self.upload_one_node_program.program_name)
71
        cls.setter_connection.delete_program(cls.upload_program.program_name)
2891
61
        self.setter_connection.delete_program(self.upload_empty_program.program_name)
72
        cls.setter_connection.delete_program(cls.upload_one_node_program.program_name)
2892
62
        del(self.setter_connection)
73
        cls.setter_connection.delete_program(cls.upload_empty_program.program_name)
2893
74
2894
75
        cls.setter_connection.close()
2895
76
        del(cls.setter_connection)
2896
63
77
2897
64
    def test_17_get_call_paths(self):
78
    def test_17_get_call_paths(self):
2898
65
        reference1 = FunctionQueryResult(parent_db=None, program_name=self.program_1_name)
79
        reference1 = FunctionQueryResult(parent_db=None, program_name=self.program_1_name)
2899
@@ -134,7 +148,7 @@
2900
134
148
2901
135
    def test_08_get_program_names(self):
149
    def test_08_get_program_names(self):
2902
136
        reference = {self.program_1_name, self.one_node_program_name, self.empty_program_name}
150
        reference = {self.program_1_name, self.one_node_program_name, self.empty_program_name}
2904
137
        self.assertEqual(reference, self.getter_connection.get_program_names())
151
        self.assertTrue(reference.issubset(self.getter_connection.get_program_names()))
2905
138
152
2906
139
153
2907
140
    def test_11_get_all_functions_called(self):
154
    def test_11_get_all_functions_called(self):
2908
@@ -249,13 +263,13 @@
2909
249
        self.assertIsNone(self.getter_connection.get_call_paths(self.one_node_program_name, 'notafunc', 'notafunc'))
263
        self.assertIsNone(self.getter_connection.get_call_paths(self.one_node_program_name, 'notafunc', 'notafunc'))
2910
250
264
2911
251
    def test_10_validator(self):
265
    def test_10_validator(self):
2919
252
        self.assertFalse(Validator.validate(''))
266
        self.assertFalse(validate_query(''))
2920
253
        self.assertTrue(Validator.validate('thisworks'))
267
        self.assertTrue(validate_query('thisworks'))
2921
254
        self.assertTrue(Validator.validate('th1sw0rks'))
268
        self.assertTrue(validate_query('th1sw0rks'))
2922
255
        self.assertTrue(Validator.validate('12345'))
269
        self.assertTrue(validate_query('12345'))
2923
256
        self.assertFalse(Validator.validate('this does not work'))
270
        self.assertFalse(validate_query('this does not work'))
2924
257
        self.assertTrue(Validator.validate('this_does_work'))
271
        self.assertTrue(validate_query('this_does_work'))
2925
258
        self.assertFalse(Validator.validate("'")) # string consisting of a single quote mark
272
        self.assertFalse(validate_query("'")) # string consisting of a single quote mark
2926
259
273
2927
260
if __name__ == '__main__':
274
if __name__ == '__main__':
2928
261
    unittest.main()
2929
262
\ No newline at end of file
275
\ No newline at end of file
2930
276
    unittest.main()
2931
263
277
2932
=== added file 'src/sextant/test_parser.py'
2933
--- src/sextant/test_parser.py	1970-01-01 00:00:00 +0000
2934
+++ src/sextant/test_parser.py	2014-10-23 12:33:12 +0000
2935
@@ -0,0 +1,85 @@
2936
1
#!/usr/bin/python
2937
2
from collections import defaultdict
2938
3
import unittest
2939
4
import subprocess
2940
5
2941
6
import objdump_parser as parser
2942
7
2943
8
DUMP_FILE = 'test_resources/parser_test.dump'
2944
9
2945
10
class TestSequence(unittest.TestCase):
2946
11
    def setUp(self):
2947
12
        pass
2948
13
    
2949
14
    def add_function(self, dct, name, typ):
2950
15
        self.assertFalse(name in dct, "duplicate function added: {} into {}".format(name, dct.keys()))
2951
16
        dct[name] = typ
2952
17
2953
18
    def add_call(self, dct, caller, callee):
2954
19
        dct[caller].append(callee)
2955
20
2956
21
    def do_parse(self, path=DUMP_FILE, sections=['.text'], ignore_ptrs=False):
2957
22
        functions = {}
2958
23
        calls = defaultdict(list)
2959
24
        
2960
25
        # set the Parser to put output in local dictionaries
2961
26
        add_function = lambda n, t: self.add_function(functions, n, t)
2962
27
        add_call = lambda a, b: self.add_call(calls, a, b)
2963
28
2964
29
        p = parser.Parser(path, sections=sections, ignore_ptrs=ignore_ptrs, 
2965
30
                          add_function=add_function, add_call=add_call)
2966
31
        res = p.parse()
2967
32
2968
33
        parser.add_function = None
2969
34
        parser.add_call = None
2970
35
2971
36
        return res, functions, calls
2972
37
2973
38
2974
39
    def test_open(self):
2975
40
        self.assertRaises(parser.FileNotFoundError, parser.Parser, file_path='rubbish file')
2976
41
2977
42
    def test_functions(self):
2978
43
        # ensure that the correct functions are listed with the correct types
2979
44
        res, funcs, calls = self.do_parse()
2980
45
2981
46
        for name, typ in zip(['normal', 'duplicates', 'wierd$name', 'printf', 'func_ptr_3'], 
2982
47
                             ['normal', 'normal', 'normal', 'stub', 'pointer']):
2983
48
            self.assertTrue(name in funcs, "'{}' not found in function dictionary".format(name))
2984
49
            self.assertEquals(funcs[name], typ)
2985
50
2986
51
        self.assertFalse('__gmon_start__' in funcs, "don't see a function defined in .plt")
2987
52
2988
53
    def test_no_ptrs(self):
2989
54
        # ensure that the ignore_ptrs flags is working
2990
55
        res, funcs, calls = self.do_parse(ignore_ptrs=True)
2991
56
2992
57
        self.assertFalse('pointer' in funcs.values())
2993
58
        self.assertEqual(len(calls['normal']), 2)
2994
59
2995
60
2996
61
    def test_calls(self):
2997
62
        res, funcs, calls = self.do_parse()
2998
63
2999
64
        self.assertTrue('normal' in calls['main'])
3000
65
        self.assertTrue('duplicates' in calls['main'])
3001
66
3002
67
        normal_calls = sorted(['wierd$name', 'printf', 'func_ptr_3'])
3003
68
        self.assertEquals(sorted(calls['normal']), normal_calls)
3004
69
3005
70
        self.assertEquals(calls['duplicates'].count('normal'), 2)
3006
71
        self.assertEquals(calls['duplicates'].count('printf'), 2, 
3007
72
                          "expected 2 printf calls in {}".format(calls['duplicates']))
3008
73
        self.assertTrue('func_ptr_4' in calls['duplicates'])
3009
74
        self.assertTrue('func_ptr_5' in calls['duplicates'])
3010
75
3011
76
    def test_sections(self):
3012
77
        res, funcs, calls = self.do_parse(sections=['.plt', '.text'])
3013
78
3014
79
        # check that we have got rid of the @s in the names
3015
80
        self.assertTrue('@' not in ''.join(funcs.keys()), "check names are extracted correctly")
3016
81
        self.assertTrue('__gmon_start__' in funcs, "see a function defined only in .plt")
3017
82
3018
83
    
3019
84
if __name__ == '__main__':
3020
85
    unittest.main()
3021
0
86
3022
=== added directory 'src/sextant/test_resources'
3023
=== added file 'src/sextant/test_resources/parser_test'
3024
1
Binary files src/sextant/test_resources/parser_test	1970-01-01 00:00:00 +0000 and src/sextant/test_resources/parser_test	2014-10-23 12:33:12 +0000 differ
87
Binary files src/sextant/test_resources/parser_test	1970-01-01 00:00:00 +0000 and src/sextant/test_resources/parser_test	2014-10-23 12:33:12 +0000 differ
3025
=== added file 'src/sextant/test_resources/parser_test.c'
3026
--- src/sextant/test_resources/parser_test.c	1970-01-01 00:00:00 +0000
3027
+++ src/sextant/test_resources/parser_test.c	2014-10-23 12:33:12 +0000
3028
@@ -0,0 +1,57 @@
3029
1
// COMMENT
3030
2
#include<stdio.h>
3031
3
3032
4
static int
3033
5
normal(int a);
3034
6
3035
7
static int
3036
8
wierd$name(int a);
3037
9
3038
10
typedef int (*pointer)(int);
3039
11
3040
12
static int
3041
13
normal(int a)
3042
14
{
3043
15
    /* call a normal func,
3044
16
     * a stub and a pointer
3045
17
     */
3046
18
    pointer ptr = wierd$name;
3047
19
3048
20
    wierd$name(a);
3049
21
    printf("%d\n", a);
3050
22
    ptr(a);
3051
23
3052
24
    return (a);
3053
25
}
3054
26
3055
27
static int
3056
28
wierd$name(int a)
3057
29
{
3058
30
    return (a);
3059
31
}
3060
32
3061
33
static int
3062
34
duplicates(int a)
3063
35
{
3064
36
    pointer ptr1 = wierd$name;
3065
37
3066
38
    /* check stubs don't get duplicated */
3067
39
    printf("first %d\n", a);
3068
40
    printf("second %d\n", a);
3069
41
3070
42
    normal(a);
3071
43
    normal(a);
3072
44
3073
45
    ptr1(a);
3074
46
    ptr1(a);
3075
47
3076
48
    return (a);
3077
49
}
3078
50
3079
51
int
3080
52
main(void) 
3081
53
{
3082
54
    normal(1);
3083
55
    duplicates(1);
3084
56
    return (0);
3085
57
}
3086
0
58
3087
=== added file 'src/sextant/test_resources/parser_test.dump'
3088
--- src/sextant/test_resources/parser_test.dump	1970-01-01 00:00:00 +0000
3089
+++ src/sextant/test_resources/parser_test.dump	2014-10-23 12:33:12 +0000
3090
@@ -0,0 +1,44 @@
3091
1
Disassembly of section .init:
3092
2
080482b4 <_init>:
3093
3
 80482b8:	call   8048350 <__x86.get_pc_thunk.bx>
3094
4
 80482cd:	call   8048300 <__gmon_start__@plt>
3095
5
Disassembly of section .plt:
3096
6
080482e0 <printf@plt-0x10>:
3097
7
080482f0 <printf@plt>:
3098
8
08048300 <__gmon_start__@plt>:
3099
9
08048310 <__libc_start_main@plt>:
3100
10
Disassembly of section .text:
3101
11
08048320 <_start>:
3102
12
 804833c:	call   8048310 <__libc_start_main@plt>
3103
13
08048350 <__x86.get_pc_thunk.bx>:
3104
14
08048360 <deregister_tm_clones>:
3105
15
 8048386:	call   *%eax
3106
16
08048390 <register_tm_clones>:
3107
17
 80483c3:	call   *%edx
3108
18
080483d0 <__do_global_dtors_aux>:
3109
19
 80483df:	call   8048360 <deregister_tm_clones>
3110
20
080483f0 <frame_dummy>:
3111
21
 804840f:	call   *%eax
3112
22
0804841d <normal>:
3113
23
 8048430:	call   8048458 <wierd$name>
3114
24
 8048443:	call   80482f0 <printf@plt>
3115
25
 8048451:	call   *%eax
3116
26
08048458 <wierd$name>:
3117
27
08048460 <duplicates>:
3118
28
 804847b:	call   80482f0 <printf@plt>
3119
29
 804848e:	call   80482f0 <printf@plt>
3120
30
 8048499:	call   804841d <normal>
3121
31
 80484a4:	call   804841d <normal>
3122
32
 80484b2:	call   *%eax
3123
33
 80484bd:	call   *%eax
3124
34
080484c4 <main>:
3125
35
 80484d4:	call   804841d <normal>
3126
36
 80484e0:	call   8048460 <duplicates>
3127
37
080484f0 <__libc_csu_init>:
3128
38
 80484f6:	call   8048350 <__x86.get_pc_thunk.bx>
3129
39
 804850e:	call   80482b4 <_init>
3130
40
 804853b:	call   *-0xf8(%ebx,%edi,4)
3131
41
08048560 <__libc_csu_fini>:
3132
42
Disassembly of section .fini:
3133
43
08048564 <_fini>:
3134
44
 8048568:	call   8048350 <__x86.get_pc_thunk.bx>
3135
0
45
3136
=== added file 'src/sextant/test_sshmanager.py'
3137
--- src/sextant/test_sshmanager.py	1970-01-01 00:00:00 +0000
3138
+++ src/sextant/test_sshmanager.py	2014-10-23 12:33:12 +0000
3139
@@ -0,0 +1,72 @@
3140
1
#!/usr/bin/python3
3141
2
import unittest
3142
3
import sshmanager
3143
4
import sshmanager
3144
5
import os
3145
6
sshmanager.TMP_DIR = '/home/benhutc/obj/csvload/src/sextant/test_resources/tmp'
3146
7
3147
8
3148
9
class TestSequence(unittest.TestCase):
3149
10
    def setUp(self):
3150
11
        self.manager = None
3151
12
3152
13
    def tearDown(self):
3153
14
        if self.manager:
3154
15
            self.manager.close()
3155
16
            self.manager = None
3156
17
3157
18
    def get_manager(self, local_port=9643, remote_host='localhost', 
3158
19
                    remote_port=9643, ssh_user=None):
3159
20
        return sshmanager.SSHManager(local_port, remote_host, remote_port, ssh_user)
3160
21
3161
22
    def test_init(self):
3162
23
        self.assertRaises(ValueError, self.get_manager, local_port='invalid port')
3163
24
        self.assertRaises(ValueError, self.get_manager, remote_port='invalid port')
3164
25
3165
26
    def test_connect(self):
3166
27
        # make a connection to localhost and ensure that tmp is created
3167
28
        self.manager = self.get_manager()
3168
29
        self.assertTrue(os.path.isdir(self.manager._tmp_dir))
3169
30
        self.manager.close()
3170
31
        self.assertFalse(os.path.isdir(self.manager._tmp_dir))
3171
32
        self.manager = None
3172
33
3173
34
        # check connecion failure
3174
35
        self.assertRaises(sshmanager.SSHConnectionError, self.get_manager, remote_host='invalid host')
3175
36
3176
37
    def test_files(self):
3177
38
        genuine_file = 'test_resources/parser_test.c'
3178
39
        genuine_file2 = 'test_resources/parser_test'
3179
40
        absent_file = 'absent_file'
3180
41
3181
42
        self.manager = self.get_manager()
3182
43
        # check sending no files fails
3183
44
        self.assertRaises(ValueError, self.manager.send_to_tmp_dir, [])
3184
45
        # and sending an non-existent file
3185
46
        self.assertRaises(ValueError, self.manager.send_to_tmp_dir, [absent_file, genuine_file])
3186
47
        
3187
48
        self.manager.send_to_tmp_dir([genuine_file, genuine_file2])
3188
49
        self.assertTrue(os.path.isfile(os.path.join(self.manager._tmp_dir, genuine_file.split('/')[-1])))
3189
50
        self.assertTrue(os.path.isfile(os.path.join(self.manager._tmp_dir, genuine_file2.split('/')[-1])))
3190
51
3191
52
        self.manager.remove_from_tmp_dir([genuine_file, genuine_file2])
3192
53
        self.assertFalse(os.path.isfile(os.path.join(self.manager._tmp_dir, 
3193
54
                                               genuine_file.split('/')[-1])))
3194
55
        self.assertFalse(os.path.isfile(os.path.join(self.manager._tmp_dir,
3195
56
                                               genuine_file2.split('/')[-1])))
3196
57
3197
58
3198
59
        self.manager.close()
3199
60
        self.manager = None
3200
61
3201
62
3202
63
if __name__ == '__main__':
3203
64
    # no coverage for:
3204
65
    #  specifying ssh user
3205
66
    #  scp failure
3206
67
    #  an error in closing the ssh connection
3207
68
    #  another error in closing the ssh connection
3208
69
    #  mkdir failure
3209
70
    #  rmdir failure
3210
71
    unittest.main()
3211
72
3212
0
73
3213
=== modified file 'src/sextant/update_db.py'
3214
--- src/sextant/update_db.py	2014-09-29 14:01:39 +0000
3215
+++ src/sextant/update_db.py	2014-10-23 12:33:12 +0000
3216
@@ -5,72 +5,106 @@
3217
5
# -----------------------------------------
5
# -----------------------------------------
3218
6
# Given a program file to upload, or a program name to delete from the server, does the right thing.
6
# Given a program file to upload, or a program name to delete from the server, does the right thing.
3219
7
7
3220
8
from __future__ import print_function
3221
9
3222
8
__all__ = ("upload_program", "delete_program")
10
__all__ = ("upload_program", "delete_program")
3223
9
11
3226
10
from .db_api import SextantConnection, Validator
12
from .db_api import SextantConnection
3227
11
from .objdump_parser import get_parsed_objects
13
from .sshmanager import SSHConnectionError
3228
14
from .objdump_parser import Parser, run_objdump
3229
12
from os import path
15
from os import path
3230
16
from time import time
3231
17
import subprocess
3232
18
import sys
3233
13
19
3234
14
import logging
20
import logging
3235
15
21
3295
16
22
def upload_program(connection, user_name, file_path, program_name=None, 
3296
17
def upload_program(user_name, file_path, db_url, display_url='',
23
                   not_object_file=False):
3297
18
                   alternative_name=None, not_object_file=False):
24
    """
3298
19
    """
25
    Upload a program's functions and call graph to the database.
3299
20
    Uploads a program to the remote database.
26
3300
21
27
    Arguments:
3301
22
    Raises requests.exceptions.ConnectionError if the server didn't exist.
28
        connection:
3302
23
    Raises IOError if file_path doesn't correspond to a file.
29
            The SextantConnection object that manages the database connection.
3303
24
    Raises ValueError if the desired alternative_name (or the default, if no
30
        user_name:
3304
25
    alternative_name was specified) already exists in the database.
31
            The user name of the user uploading the program.
3305
26
    :param file_path: the path to the local file we wish to upload
32
        file_path:
3306
27
    :param db_url: the URL of the database (eg. http://localhost:7474)
33
            The path to either: the output of objdump (if not_object_file is 
3307
28
    :param display_url: alternative URL to display instead of db_url
34
            True) OR to a binary file if (not_object_file is False).
3308
29
    :param alternative_name: a name to give the program to override the default
35
        program_name:
3309
30
    :param object_file: bool(the file is an objdump text output file, rather than a compiled binary)
36
            An optional name to give the program in the database, if not
3310
31
37
            specified then <user_name>-<file name> will be used.
3311
32
    """
38
        not_object_file:
3312
33
39
            Flag controlling whether file_path is pointing to a dump file or
3313
34
    if not display_url:
40
            a binary file.
3314
35
        display_url = db_url
41
    """
3315
36
42
    if not connection._ssh:
3316
37
    # if no name is specified, use the form "<username>-<binary name>"
43
        raise SSHConnectionError('An SSH connection is required for '
3317
38
    name = alternative_name or (user_name + '-' + path.split(file_path)[-1])
44
                                 'program upload.')
3318
39
45
3319
40
    connection = SextantConnection(db_url)
46
    if not program_name:
3320
41
47
        file_no_ext = path.basename(file_path).split('.')[0]
3321
42
    program_names = connection.get_program_names()
48
        program_name = '{}-{}'.format(user_name, file_no_ext)
3322
43
    if Validator.sanitise(name) in program_names:
49
3323
44
        raise ValueError("There is already a program with name {}; "
50
3324
45
                         "please delete the previous one with the same name "
51
    if program_name in connection.get_program_names():
3325
46
                         "and retry, or rename the input file.".format(name))
52
        raise ValueError('A program with name `{}` already exists in the database'
3326
47
53
                         .format(program_name))
3327
48
    parsed_objects = get_parsed_objects(filepath=file_path,
54
3328
49
                                        sections_to_view=['.text'],
55
3329
50
                                        not_object_file=not_object_file,
56
    print('Uploading `{}` to the database. '
3330
51
                                        ignore_function_pointers=False)
57
          'This may take some time for larger programs.'
3331
52
58
          .format(program_name))
3332
53
    logging.info('Objdump has parsed!')
59
    start = time()
3333
54
60
3334
55
    program_representation = connection.new_program(Validator.sanitise(name))
61
    if not not_object_file:
3335
56
62
        print('Generating dump file...', end='')
3336
57
    for obj in parsed_objects:
63
        sys.stdout.flush()
3337
58
        for called in obj.what_do_i_call:
64
        file_path, file_object = run_objdump(file_path)
3338
59
            if not program_representation.add_function_call(obj.name, called[-1]): # called is a tuple (address, name)
65
        print('done.')
3339
60
                logging.error('Validation error: {} calling {}'.format(obj.name, called[-1]))
66
    else:
3340
61
67
        file_object = None
3341
62
    logging.info('Sending {} named objects to server {}...'.format(len(parsed_objects), display_url))
68
3342
63
    program_representation.commit()
69
    # Make parser and wire to DBprogram.
3343
64
    logging.info('Successfully added {}.'.format(name))
70
    with connection.new_program(program_name) as program:
3344
65
71
3345
66
72
        def start_parser(program):
3346
67
def delete_program(program_name, db_url):
73
            print('Parsing dump file...', end='')
3347
68
    """
74
            sys.stdout.flush()
3348
69
    Deletes a program with the specified name from the database.
75
3349
70
    :param program_name: the name of the program to delete
76
        def finish_parser(parser, program):
3350
71
    :param db_url: the URL of the database (eg. http://localhost:7474)
77
            # Callback to make sure the program's csv files are flushed when
3351
72
    :return: bool(success)
78
            # the parser completes.
3352
73
    """
79
            program.func_writer.finish()
3353
74
    connection = SextantConnection(db_url)
80
            program.call_writer.finish()
3354
81
3355
82
            print('done: {} functions and {} calls.'
3356
83
                  .format(parser.function_count, parser.call_count))
3357
84
3358
85
        parser = Parser(file_path = file_path, file_object = file_object,
3359
86
                        sections=[],
3360
87
                        add_function = program.add_function,
3361
88
                        add_call = program.add_call,
3362
89
                        started=lambda parser: start_parser(program),
3363
90
                        finished=lambda parser: finish_parser(parser, program))
3364
91
        parser.parse()
3365
92
3366
93
        program.commit()
3367
94
3368
95
    end = time()
3369
96
    print('Finished in {:.2f}s.'.format(end-start))
3370
97
3371
98
3372
99
def delete_program(connection, program_name):
3373
100
    """
3374
101
    Remove the specified program from the database.
3375
102
3376
103
    Arguments:
3377
104
        connection:
3378
105
            The SextantConnection object managing the database connection.
3379
106
        program_name:
3380
107
            The name of the program to remove from the database.
3381
108
    """
3382
75
    connection.delete_program(program_name)
109
    connection.delete_program(program_name)
3384
76
    print('Successfully deleted {}.'.format(program_name))
110
3385
77
111
3386
=== modified file 'src/sextant/web/server.py'
3387
--- src/sextant/web/server.py	2014-10-03 11:47:52 +0000
3388
+++ src/sextant/web/server.py	2014-10-23 12:33:12 +0000
3389
@@ -26,7 +26,8 @@
3390
26
26
3391
27
from cgi import escape  # deprecated in Python 3 in favour of html.escape, but we're stuck on Python 2
27
from cgi import escape  # deprecated in Python 3 in favour of html.escape, but we're stuck on Python 2
3392
28
28
3394
29
database_url = None  # the URL to access the database instance
29
# global SextantConnection object which deals with the port forwarding
3395
30
CONNECTION = None
3396
30
31
3397
31
RESPONSE_CODE_OK = 200
32
RESPONSE_CODE_OK = 200
3398
32
RESPONSE_CODE_BAD_REQUEST = 400
33
RESPONSE_CODE_BAD_REQUEST = 400
3399
@@ -67,25 +68,6 @@
3400
67
68
3401
68
class SVGRenderer(Resource):
69
class SVGRenderer(Resource):
3402
69
70
3403
70
    def error_creating_neo4j_connection(self, failure):
3404
71
        self.write("Error creating Neo4J connection: %s\n") % failure.getErrorMessage()
3405
72
3406
73
    @staticmethod
3407
74
    def create_neo4j_connection():
3408
75
        return db_api.SextantConnection(database_url)
3409
76
3410
77
    @staticmethod
3411
78
    def check_program_exists(connection, name):
3412
79
        return connection.check_program_exists(name)
3413
80
3414
81
    @staticmethod
3415
82
    def get_whole_program(connection, name):
3416
83
        return connection.get_whole_program(name)
3417
84
3418
85
    @staticmethod
3419
86
    def get_functions_calling(connection, progname, funcname):
3420
87
        return connection.get_all_functions_calling(progname, funcname)
3421
88
3422
89
    @staticmethod
71
    @staticmethod
3423
90
    def get_plot(program, suppress_common_functions=False, remove_self_calls=False):
72
    def get_plot(program, suppress_common_functions=False, remove_self_calls=False):
3424
91
        graph_dot = export.ProgramConverter.to_dot(program, suppress_common_functions,
73
        graph_dot = export.ProgramConverter.to_dot(program, suppress_common_functions,
3425
@@ -111,7 +93,7 @@
3426
111
        res_msg = None # set this in the logic
93
        res_msg = None # set this in the logic
3427
112
94
3428
113
        #
95
        #
3430
114
        # Get program name and database connection, check if program exists
96
        # Check if provided program name exists
3431
115
        #
97
        #
3432
116
98
3433
117
        name = args.get('program_name', [None])[0]
99
        name = args.get('program_name', [None])[0]
3434
@@ -121,16 +103,7 @@
3435
121
            res_msg = "Supply 'program_name' parameter."
103
            res_msg = "Supply 'program_name' parameter."
3436
122
104
3437
123
        if res_code is RESPONSE_CODE_OK:
105
        if res_code is RESPONSE_CODE_OK:
3448
124
            try:
106
            exists = yield deferToThread(CONNECTION.check_program_exists, name)
3439
125
                conn = yield deferToThread(self.create_neo4j_connection)
3440
126
            except requests.exceptions.ConnectionError:
3441
127
                res_code = RESPONSE_CODE_BAD_GATEWAY 
3442
128
                res_fmt = "Could not reach Neo4j server at {}"
3443
129
                res_msg = res_fmt.format(database_url)
3444
130
                conn = None
3445
131
3446
132
        if res_code is RESPONSE_CODE_OK:
3447
133
            exists = yield deferToThread(self.check_program_exists, conn, name)
3449
134
            if not exists:
107
            if not exists:
3450
135
                res_code = RESPONSE_CODE_NOT_FOUND
108
                res_code = RESPONSE_CODE_NOT_FOUND
3451
136
                res_fmt = "Program {} not found in database."
109
                res_fmt = "Program {} not found in database."
3452
@@ -146,28 +119,23 @@
3453
146
        # look for in request.args, both tuples
119
        # look for in request.args, both tuples
3454
147
        queries = {
120
        queries = {
3455
148
            'whole_program': (
121
            'whole_program': (
3458
149
                self.get_whole_program,
122
                CONNECTION.get_whole_program,
3457
150
                (conn, name), 
3459
151
                ()
123
                ()
3460
152
            ),
124
            ),
3461
153
            'functions_calling': (
125
            'functions_calling': (
3464
154
                self.get_functions_calling,
126
                CONNECTION.get_all_functions_calling,
3463
155
                (conn, name), 
3465
156
                ('func1',)
127
                ('func1',)
3466
157
            ),
128
            ),
3467
158
            'functions_called_by': (
129
            'functions_called_by': (
3470
159
                conn.get_all_functions_called,
130
                CONNECTION.get_all_functions_called,
3469
160
                (name,), 
3471
161
                ('func1',)
131
                ('func1',)
3472
162
            ),
132
            ),
3473
163
            'all_call_paths': (
133
            'all_call_paths': (
3476
164
                conn.get_call_paths,
134
                CONNECTION.get_call_paths,
3475
165
                (name,),
3477
166
                ('func1', 'func2')
135
                ('func1', 'func2')
3478
167
            ),
136
            ),
3479
168
            'shortest_call_path': (
137
            'shortest_call_path': (
3482
169
                conn.get_shortest_path_between_functions,
138
                CONNECTION.get_shortest_path_between_functions,
3481
170
                (name,),
3483
171
                ('func1', 'func2')
139
                ('func1', 'func2')
3484
172
            )
140
            )
3485
173
        }
141
        }
3486
@@ -186,7 +154,7 @@
3487
186
154
3488
187
        # extract any required keyword arguments from request.args
155
        # extract any required keyword arguments from request.args
3489
188
        if res_code is RESPONSE_CODE_OK:
156
        if res_code is RESPONSE_CODE_OK:
3491
189
            fn, known_args, kwargs = query
157
            fn, kwargs = query
3492
190
            
158
            
3493
191
            # all args will be strings - use None to indicate missing argument
159
            # all args will be strings - use None to indicate missing argument
3494
192
            req_args = tuple(args.get(kwarg, [None])[0] for kwarg in kwargs)
160
            req_args = tuple(args.get(kwarg, [None])[0] for kwarg in kwargs)
3495
@@ -202,9 +170,8 @@
3496
202
        # if we are okay here we have a valid query with all required arguments
170
        # if we are okay here we have a valid query with all required arguments
3497
203
        if res_code is RESPONSE_CODE_OK:
171
        if res_code is RESPONSE_CODE_OK:
3498
204
            try:
172
            try:
3499
205
                all_args = known_args + req_args
3500
206
                program = yield defer_to_thread_with_timeout(render_timeout, fn,
173
                program = yield defer_to_thread_with_timeout(render_timeout, fn,
3502
207
                                                             *all_args)
174
                                                             name, *req_args)
3503
208
            except defer.CancelledError:
175
            except defer.CancelledError:
3504
209
                # the timeout has fired and cancelled the request
176
                # the timeout has fired and cancelled the request
3505
210
                res_code = RESPONSE_CODE_BAD_REQUEST
177
                res_code = RESPONSE_CODE_BAD_REQUEST
3506
@@ -247,16 +214,12 @@
3507
247
class GraphProperties(Resource):
214
class GraphProperties(Resource):
3508
248
215
3509
249
    @staticmethod
216
    @staticmethod
3520
250
    def _get_connection():
217
    def _get_program_names():
3521
251
        return db_api.SextantConnection(database_url)
218
        return CONNECTION.get_program_names()
3522
252
219
3523
253
    @staticmethod
220
    @staticmethod
3524
254
    def _get_program_names(connection):
221
    def _get_function_names(program_name):
3525
255
        return connection.get_program_names()
222
        return CONNECTION.get_function_names(program_name)
3516
256
3517
257
    @staticmethod
3518
258
    def _get_function_names(connection, program_name):
3519
259
        return connection.get_function_names(program_name)
3526
260
223
3527
261
    @defer.inlineCallbacks
224
    @defer.inlineCallbacks
3528
262
    def _render_GET(self, request):
225
    def _render_GET(self, request):
3529
@@ -269,18 +232,9 @@
3530
269
232
3531
270
        query = request.args['query'][0]
233
        query = request.args['query'][0]
3532
271
234
3533
272
        try:
3534
273
            neo4j_connection = yield deferToThread(self._get_connection)
3535
274
        except Exception:
3536
275
            request.setResponseCode(502)  # Bad Gateway
3537
276
            request.write("Could not reach Neo4j server at {}.".format(database_url))
3538
277
            request.finish()
3539
278
            defer.returnValue(None)
3540
279
            neo4j_connection = None  # just to silence the "referenced before assignment" warnings
3541
280
3542
281
        if query == 'programs':
235
        if query == 'programs':
3543
282
            request.setHeader("content-type", "application/json")
236
            request.setHeader("content-type", "application/json")
3545
283
            prognames = yield deferToThread(self._get_program_names, neo4j_connection)
237
            prognames = yield deferToThread(self._get_program_names)
3546
284
            request.write(json.dumps(list(prognames)))
238
            request.write(json.dumps(list(prognames)))
3547
285
            request.finish()
239
            request.finish()
3548
286
            defer.returnValue(None)
240
            defer.returnValue(None)
3549
@@ -294,7 +248,7 @@
3550
294
                defer.returnValue(None)
248
                defer.returnValue(None)
3551
295
            program_name = request.args['program_name'][0]
249
            program_name = request.args['program_name'][0]
3552
296
250
3554
297
            funcnames = yield deferToThread(self._get_function_names, neo4j_connection, program_name)
251
            funcnames = yield deferToThread(self._get_function_names, program_name)
3555
298
            if funcnames is None:
252
            if funcnames is None:
3556
299
                request.setResponseCode(404)
253
                request.setResponseCode(404)
3557
300
                request.setHeader("content-type", "text/plain")
254
                request.setHeader("content-type", "text/plain")
3558
@@ -319,10 +273,12 @@
3559
319
        return NOT_DONE_YET
273
        return NOT_DONE_YET
3560
320
274
3561
321
275
3566
322
def serve_site(input_database_url='http://localhost:7474', port=2905):
276
def serve_site(connection, port):
3567
323
277
    global CONNECTION
3568
324
    global database_url
278
3569
325
    database_url = input_database_url
279
    CONNECTION = connection
3570
280
3571
281
3572
326
    # serve static directory at root
282
    # serve static directory at root
3573
327
    root = File(os.path.join(environment.RESOURCES_DIR, 'sextant', 'web'))
283
    root = File(os.path.join(environment.RESOURCES_DIR, 'sextant', 'web'))
3574
328
284
Reviewer	Review Type	Date Requested	Status
Robert		2014-10-23	Approve on 2014-10-23
Review via email: mp+239356@code.launchpad.net