Merge lp:~ben-hutchings/ensoft-sextant/file-names into lp:ensoft-sextant

Proposed by Ben Hutchings
Status: Merged
Merged at revision: 32
Proposed branch: lp:~ben-hutchings/ensoft-sextant/file-names
Merge into: lp:ensoft-sextant
Diff against target: 170 lines (+35/-18)
4 files modified
src/sextant/db_api.py (+5/-4)
src/sextant/objdump_parser.py (+26/-10)
src/sextant/test_parser.py (+1/-1)
src/sextant/update_db.py (+3/-3)
To merge this branch: bzr merge lp:~ben-hutchings/ensoft-sextant/file-names
Reviewer Review Type Date Requested Status
Robert Pending
Review via email: mp+240731@code.launchpad.net

Commit message

Added 'file' attribute to functions in the database, modified the objdump call code and the objdump parser to read files from the disassembley.

Description of the change

Functions now have a 'file' attribute in the database - which stores the path of their source file. This information is not yet used anywhere else in Sextant.

To post a comment you must log in.
32. By Ben Hutchings

fixed test_parser to run cleanly

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'src/sextant/db_api.py'
2--- src/sextant/db_api.py 2014-10-23 11:15:48 +0000
3+++ src/sextant/db_api.py 2014-11-05 16:09:28 +0000
4@@ -159,7 +159,7 @@
5 tmp_path = os.path.join(self._tmp_dir, '{}_{{}}'.format(program_name))
6
7 self.func_writer = CSVWriter(tmp_path.format('funcs'),
8- headers=['name', 'type'],
9+ headers=['name', 'type', 'file'],
10 max_rows=5000)
11 self.call_writer = CSVWriter(tmp_path.format('calls'),
12 headers=['caller', 'callee'],
13@@ -171,7 +171,7 @@
14 ' WITH line, toInt(line.id) as lineid'
15 ' MATCH (n:program {{name: "{}"}})'
16 ' CREATE (n)-[:subject]->(m:func {{name: line.name,'
17- ' id: lineid, type: line.type}})')
18+ ' id: lineid, type: line.type, file: line.file}})')
19
20 self.add_call_query = (' USING PERIODIC COMMIT 250'
21 ' LOAD CSV WITH HEADERS FROM "file:{}" AS line'
22@@ -203,7 +203,7 @@
23 # Propagate the error if there is one.
24 return False if etype is not None else True
25
26- def add_function(self, name, typ='normal'):
27+ def add_function(self, name, typ='normal', source='unknown'):
28 """
29 Add a function.
30
31@@ -219,7 +219,7 @@
32 pointer: we know only that the function exists, not its
33 name or details.
34 """
35- self.func_writer.write(name, typ)
36+ self.func_writer.write(name, typ, source)
37
38 def add_call(self, caller, callee):
39 """
40@@ -290,6 +290,7 @@
41
42 tx.append('CREATE CONSTRAINT ON (p:program) ASSERT p.name IS UNIQUE')
43 tx.append('CREATE INDEX ON :func(name)')
44+ tx.append('CREATE INDEX ON: func(file)')
45
46 # Apply the transaction.
47 tx.commit()
48
49=== modified file 'src/sextant/objdump_parser.py'
50--- src/sextant/objdump_parser.py 2014-10-23 11:15:48 +0000
51+++ src/sextant/objdump_parser.py 2014-11-05 16:09:28 +0000
52@@ -107,8 +107,8 @@
53 self._known_stubs = set()
54
55 # By default print information to stdout.
56- def print_func(name, typ):
57- print('func {:25}{}'.format(name, typ))
58+ def print_func(name, typ, source='unknown'):
59+ print('func {:25}{:15}{}'.format(name, typ, source))
60
61 def print_call(caller, callee):
62 print('call {:25}{:25}'.format(caller, callee))
63@@ -116,7 +116,6 @@
64 def print_started(parser):
65 print('parse started: {}[{}]'.format(self.path, ', '.join(self.sections)))
66
67-
68 def print_finished(parser):
69 print('parsed {} functions and {} calls'.format(self.function_count, self.call_count))
70
71@@ -134,11 +133,11 @@
72 self.function_ptr_count += 1
73 return name
74
75- def _add_function_normal(self, name):
76+ def _add_function_normal(self, name, source):
77 """
78 Add a function which we have full assembly code for.
79 """
80- self.add_function(name, 'normal')
81+ self.add_function(name, 'normal', source)
82 self.function_count += 1
83
84 def _add_function_ptr(self, name):
85@@ -171,10 +170,20 @@
86 self.started()
87
88 if self._file is not None:
89- in_section = False # if we are in one of self.sections
90- current_function = None # track the caller for function calls
91+ in_section = False # If we are in one of self.sections.
92+ current_function = None # Track the caller for function calls.
93+ to_add = False
94
95 for line in self._file:
96+ if to_add:
97+ file_line = line.startswith('/')
98+ source = line.split(':')[0] if file_line else 'unknown'
99+ self._add_function_normal(current_function, source)
100+ to_add = False
101+
102+ if file_line:
103+ continue
104+
105 if line.startswith('Disassembly'):
106 # 'Disassembly of section <name>:\n'
107 section = line.split(' ')[-1].rstrip(':\n')
108@@ -194,7 +203,8 @@
109 self._add_function_stub(current_function)
110 else:
111 current_function = function_identifier
112- self._add_function_normal(current_function)
113+ # Flag function - we look for source on the next line.
114+ to_add = True
115
116 elif 'call ' in line or 'callq ' in line:
117 # WHITESPACE to prevent picking up function names
118@@ -273,11 +283,17 @@
119 The path of the file to run objdump on.
120
121 """
122+ print('input file: {}'.format(input_file))
123 # A single section can be specified for parsing with the -j flag,
124 # but it is not obviously possible to parse multiple sections like this.
125- p = subprocess.Popen(['objdump', '-d', input_file, '--no-show-raw-insn'],
126+ p = subprocess.Popen(['objdump', '-d', input_file,
127+ '--no-show-raw-insn', '--line-numbers'],
128 stdout=subprocess.PIPE)
129- g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$'], stdin=p.stdout, stdout=subprocess.PIPE)
130+ # Egrep filters out the section headers (Disassembly of section...),
131+ # the call lines (... [l]call[q] ...), the function declarations
132+ # (... <function>:$) and the file paths (^/file_path).
133+ g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$|^/'],
134+ stdin=p.stdout, stdout=subprocess.PIPE)
135 return input_file, g.stdout
136
137
138
139=== modified file 'src/sextant/test_parser.py'
140--- src/sextant/test_parser.py 2014-10-23 11:15:48 +0000
141+++ src/sextant/test_parser.py 2014-11-05 16:09:28 +0000
142@@ -23,7 +23,7 @@
143 calls = defaultdict(list)
144
145 # set the Parser to put output in local dictionaries
146- add_function = lambda n, t: self.add_function(functions, n, t)
147+ add_function = lambda n, t, s='unknown': self.add_function(functions, n, t)
148 add_call = lambda a, b: self.add_call(calls, a, b)
149
150 p = parser.Parser(path, sections=sections, ignore_ptrs=ignore_ptrs,
151
152=== modified file 'src/sextant/test_resources/parser_test'
153Binary files src/sextant/test_resources/parser_test 2014-10-13 14:10:01 +0000 and src/sextant/test_resources/parser_test 2014-11-05 16:09:28 +0000 differ
154=== modified file 'src/sextant/update_db.py'
155--- src/sextant/update_db.py 2014-10-17 14:20:06 +0000
156+++ src/sextant/update_db.py 2014-11-05 16:09:28 +0000
157@@ -82,10 +82,10 @@
158 print('done: {} functions and {} calls.'
159 .format(parser.function_count, parser.call_count))
160
161- parser = Parser(file_path = file_path, file_object = file_object,
162+ parser = Parser(file_path=file_path, file_object = file_object,
163 sections=[],
164- add_function = program.add_function,
165- add_call = program.add_call,
166+ add_function=program.add_function,
167+ add_call=program.add_call,
168 started=lambda parser: start_parser(program),
169 finished=lambda parser: finish_parser(parser, program))
170 parser.parse()

Subscribers

People subscribed via source and target branches