Merge lp:~ensoft-opensource/ensoft-sextant/perf into lp:ensoft-sextant

Proposed by Patrick Stevens
Status: Merged
Approved by: Phil Connell
Approved revision: 19
Merged at revision: 21
Proposed branch: lp:~ensoft-opensource/ensoft-sextant/perf
Merge into: lp:ensoft-sextant
Diff against target: 118 lines (+52/-21)
1 file modified
src/sextant/db_api.py (+52/-21)
To merge this branch: bzr merge lp:~ensoft-opensource/ensoft-sextant/perf
Reviewer Review Type Date Requested Status
Phil Connell Approve
Review via email: mp+233077@code.launchpad.net

Commit message

Speed up program upload by batching queries differently

Upload now happens in two passes:
 - Function nodes are added.
 - Links between nodes are added.

Description of the change

Speedup in program upload (vim uploads three times faster). This is achieved by executing the relevant blueprint.

To post a comment you must log in.
18. By Patrick Stevens <email address hidden>

Remove old code

19. By Patrick Stevens <email address hidden>

Add a comment

Revision history for this message
Phil Connell (pconnell) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/sextant/db_api.py'
2--- src/sextant/db_api.py 2014-08-27 09:07:39 +0000
3+++ src/sextant/db_api.py 2014-09-03 14:12:18 +0000
4@@ -67,7 +67,8 @@
5 self.program_name = program_name
6 self.parent_database_connection = sextant_connection
7 self._functions = {}
8- self._new_tx = None
9+ self._funcs_tx = None # transaction for uploading functions
10+ self._calls_tx = None # transaction for uploading relationships
11
12 if self.parent_database_connection:
13 # we'll locally use db for short
14@@ -80,10 +81,37 @@
15 date=date)
16 self._parent_id = parent_function.id
17
18- self._new_tx = db.transaction(using_globals=False, for_query=True)
19+ self._funcs_tx = db.transaction(using_globals=False, for_query=True)
20+ self._calls_tx = db.transaction(using_globals=False, for_query=True)
21
22 self._connections = []
23
24+ @staticmethod
25+ def _get_display_name(function_name):
26+ """
27+ Gets the name we will display to the user for this function name.
28+
29+ For instance, if function_name were __libc_start_main@plt, we would
30+ return ("__libc_start_main", "plt_stub"). The returned function type is
31+ currently one of "plt_stub", "function_pointer" or "normal".
32+
33+ :param function_name: the name straight from objdump of a function
34+ :return: ("display name", "function type")
35+
36+ """
37+
38+ if function_name[-4:] == "@plt":
39+ display_name = function_name[:-4]
40+ function_group = "plt_stub"
41+ elif function_name[:20] == "_._function_pointer_":
42+ display_name = function_name
43+ function_group = "function_pointer"
44+ else:
45+ display_name = function_name
46+ function_group = "normal"
47+
48+ return display_name, function_group
49+
50 def add_function(self, function_name):
51 """
52 Adds a function to the program, ready to be sent to the remote database.
53@@ -98,21 +126,14 @@
54 if self.class_contains_function(function_name):
55 return True
56
57- if function_name[-4:] == "@plt":
58- display_name = function_name[:-4]
59- function_group = "plt_stub"
60- elif function_name[:20] == "_._function_pointer_":
61- display_name = function_name
62- function_group = "function_pointer"
63- else:
64- display_name = function_name
65- function_group = "normal"
66+ display_name, function_group = self._get_display_name(function_name)
67
68 query = ('START n = node({}) '
69- 'CREATE (n)-[:subject]->(m:func {{type: "{}", name: "{}"}})')
70+ 'CREATE (n)-[:subject]->(m:func {{type: "{}", name: "{}"}}) '
71+ 'RETURN m.name, id(m)')
72 query = query.format(self._parent_id, function_group, display_name)
73
74- self._new_tx.append(query)
75+ self._funcs_tx.append(query)
76
77 self._functions[function_name] = function_name
78
79@@ -159,13 +180,6 @@
80 self.add_function(fn_calling)
81
82 if not self.class_contains_call(fn_calling, fn_called):
83- query = ('START p = node({}) '
84- 'MATCH (p)-[:subject]->(n) WHERE n.name = "{}" '
85- 'MATCH (p)-[:subject]->(m) WHERE m.name = "{}" '
86- 'CREATE (n)-[:calls]->(m)')
87- query = query.format(self._parent_id, fn_calling, fn_called)
88- self._new_tx.append(query)
89-
90 self._connections.append((fn_calling, fn_called))
91
92 return True
93@@ -175,7 +189,24 @@
94 Call this when you are finished with the object.
95 Changes are not synced to the remote database until this is called.
96 """
97- self._new_tx.commit()
98+ functions = self._funcs_tx.commit() # send off the function names
99+
100+ # now functions is a list of QuerySequence objects, which each have a
101+ # .elements property which produces [['name', id]]
102+
103+ id_funcs = dict([seq.elements[0] for seq in functions])
104+ logging.info('Functions uploaded. Uploading calls...')
105+
106+ # so id_funcs is a dict with id_funcs['name'] == id
107+ for call in self._connections:
108+ query = ('MATCH n WHERE id(n) = {} '
109+ 'MATCH m WHERE id(m) = {} '
110+ 'CREATE (n)-[:calls]->(m)')
111+ query = query.format(id_funcs[self._get_display_name(call[0])[0]],
112+ id_funcs[self._get_display_name(call[1])[0]])
113+ self._calls_tx.append(query)
114+
115+ self._calls_tx.commit()
116
117
118 class FunctionQueryResult:

Subscribers

People subscribed via source and target branches