Merge lp:~ben-hutchings/ensoft-sextant/filter-search into lp:ensoft-sextant
- filter-search
- Merge into whiteline
Status: | Superseded |
---|---|
Proposed branch: | lp:~ben-hutchings/ensoft-sextant/filter-search |
Merge into: | lp:ensoft-sextant |
Prerequisite: | lp:~ben-hutchings/ensoft-sextant/autocomplete-fix |
Diff against target: |
696 lines (+239/-112) 8 files modified
resources/sextant/web/interface.html (+2/-2) src/sextant/__main__.py (+8/-5) src/sextant/db_api.py (+118/-59) src/sextant/export.py (+1/-1) src/sextant/objdump_parser.py (+82/-33) src/sextant/test_parser.py (+1/-1) src/sextant/update_db.py (+15/-8) src/sextant/web/server.py (+12/-3) |
To merge this branch: | bzr merge lp:~ben-hutchings/ensoft-sextant/filter-search |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Robert | Pending | ||
Review via email: mp+242079@code.launchpad.net |
This proposal supersedes a proposal from 2014-11-17.
This proposal has been superseded by a proposal from 2014-11-19.
Commit message
Description of the change
Function name search within the web frontend now supports extended syntax:
'<name matches>:<file path matches>'
where name matches and file path matches are (possibly) comma separated lists, and may include wildcards '.*'. At least one of the two must be specified.
Fixed bug with inline functions being uploaded multiple times into the database.
Fixed bug with over-zealous name stripping of function identifiers.
Fixed bug by which some functions were not uploaded.
- 45. By Ben Hutchings
-
markup fixes
- 46. By Ben Hutchings
-
markups + small bug fixes - tests do not pass (though the functionality works).
- 47. By Ben Hutchings
-
tuple instead of list
- 48. By Ben Hutchings
-
merge from autocomplete-fix
- 49. By Ben Hutchings
-
another merge from autocomplete-fix
- 50. By Ben Hutchings
-
fixed bug causing extrac characters to be removed from the start of symbol names
Unmerged revisions
Preview Diff
1 | === modified file 'resources/sextant/web/interface.html' |
2 | --- resources/sextant/web/interface.html 2014-11-19 10:32:48 +0000 |
3 | +++ resources/sextant/web/interface.html 2014-11-19 10:32:48 +0000 |
4 | @@ -27,8 +27,8 @@ |
5 | All functions calling specific function</option> |
6 | <option value="functions_called_by"> |
7 | All functions called by a specific function</option> |
8 | - <option value="all_call_paths"> |
9 | - All function call paths between two functions</option> |
10 | + <!--option value="all_call_paths"> REMOVED AS THIS IS SLOW FOR IOS |
11 | + All function call paths between two functions</option--> |
12 | <option value="shortest_call_path"> |
13 | Shortest path between two functions</option> |
14 | <option value="function_names"> |
15 | |
16 | === modified file 'src/sextant/__main__.py' |
17 | --- src/sextant/__main__.py 2014-10-17 15:30:14 +0000 |
18 | +++ src/sextant/__main__.py 2014-11-19 10:32:48 +0000 |
19 | @@ -127,16 +127,13 @@ |
20 | except TypeError: |
21 | alternative_name = None |
22 | |
23 | - not_object_file = args.not_object_file |
24 | - # the default is "yes, this is an object file" if not-object-file was |
25 | - # unsupplied |
26 | - |
27 | try: |
28 | update_db.upload_program(connection, |
29 | getpass.getuser(), |
30 | args.input_file, |
31 | alternative_name, |
32 | - not_object_file) |
33 | + args.not_object_file, |
34 | + args.add_file_paths) |
35 | except requests.exceptions.ConnectionError as e: |
36 | msg = 'Connection error to server {}: {}' |
37 | logging.error(msg.format(_displayable_url(args), e)) |
38 | @@ -221,6 +218,12 @@ |
39 | help='default False, if the input file is an ' |
40 | 'object to be disassembled', |
41 | action='store_true') |
42 | + parsers['add'].add_argument('--add-file-paths', |
43 | + help='default False, set to True to make objdump ' |
44 | + 'extract the file paths for each function. ' |
45 | + 'WARNING: this is SLOW for large object files, ' |
46 | + '~15 hours for IOS.', |
47 | + action='store_true') |
48 | |
49 | parsers['delete'] = subparsers.add_parser('delete-program', |
50 | help="delete a program from the database") |
51 | |
52 | === modified file 'src/sextant/db_api.py' |
53 | --- src/sextant/db_api.py 2014-11-19 10:32:48 +0000 |
54 | +++ src/sextant/db_api.py 2014-11-19 10:32:48 +0000 |
55 | @@ -159,7 +159,7 @@ |
56 | tmp_path = os.path.join(self._tmp_dir, '{}_{{}}'.format(program_name)) |
57 | |
58 | self.func_writer = CSVWriter(tmp_path.format('funcs'), |
59 | - headers=['name', 'type'], |
60 | + headers=['name', 'type', 'file'], |
61 | max_rows=5000) |
62 | self.call_writer = CSVWriter(tmp_path.format('calls'), |
63 | headers=['caller', 'callee'], |
64 | @@ -171,7 +171,7 @@ |
65 | ' WITH line, toInt(line.id) as lineid' |
66 | ' MATCH (n:program {{name: "{}"}})' |
67 | ' CREATE (n)-[:subject]->(m:func {{name: line.name,' |
68 | - ' id: lineid, type: line.type}})') |
69 | + ' id: lineid, type: line.type, file: line.file}})') |
70 | |
71 | self.add_call_query = (' USING PERIODIC COMMIT 250' |
72 | ' LOAD CSV WITH HEADERS FROM "file:{}" AS line' |
73 | @@ -203,7 +203,7 @@ |
74 | # Propagate the error if there is one. |
75 | return False if etype is not None else True |
76 | |
77 | - def add_function(self, name, typ='normal'): |
78 | + def add_function(self, name, typ='normal', source='unknown'): |
79 | """ |
80 | Add a function. |
81 | |
82 | @@ -219,7 +219,7 @@ |
83 | pointer: we know only that the function exists, not its |
84 | name or details. |
85 | """ |
86 | - self.func_writer.write(name, typ) |
87 | + self.func_writer.write(name, typ, source) |
88 | |
89 | def add_call(self, caller, callee): |
90 | """ |
91 | @@ -257,6 +257,19 @@ |
92 | remote_paths: |
93 | A list of the paths of the remote fils. |
94 | """ |
95 | + |
96 | + def try_rmdir(path): |
97 | + # Helper function to try and remove a directory, silently |
98 | + # fail if it contains files, otherwise raise the exception. |
99 | + try: |
100 | + os.rmdir(path) |
101 | + except OSError as e: |
102 | + if e.errno in [os.errno.ENOTEMPTY, os.errno.ENOENT]: |
103 | + # Files in directory or directory doesn't exist. |
104 | + pass |
105 | + else: |
106 | + raise e |
107 | + |
108 | print('Cleaning temporary files...', end='') |
109 | file_paths = list(itertools.chain(self.func_writer.file_iter(), |
110 | self.call_writer.file_iter())) |
111 | @@ -264,16 +277,9 @@ |
112 | for path in file_paths: |
113 | os.remove(path) |
114 | |
115 | - os.rmdir(self._tmp_dir) |
116 | - |
117 | - try: |
118 | - # If the parent sextant temp folder is empty, remove it. |
119 | - os.rmdir(TMP_DIR) |
120 | - except: |
121 | - # There is other stuff in TMP_DIR (i.e. from other users), so |
122 | - # leave it. |
123 | - pass |
124 | - |
125 | + try_rmdir(self._tmp_dir) |
126 | + try_rmdir(TMP_DIR) |
127 | + |
128 | self._ssh.remove_from_tmp_dir(remote_paths) |
129 | |
130 | print('done.') |
131 | @@ -290,6 +296,7 @@ |
132 | |
133 | tx.append('CREATE CONSTRAINT ON (p:program) ASSERT p.name IS UNIQUE') |
134 | tx.append('CREATE INDEX ON :func(name)') |
135 | + tx.append('CREATE INDEX ON: func(file)') |
136 | |
137 | # Apply the transaction. |
138 | tx.commit() |
139 | @@ -832,7 +839,7 @@ |
140 | result = self._db.query(q, returns=neo4jrestclient.Node) |
141 | return bool(result) |
142 | |
143 | - def get_function_names(self, program_name, search, max_funcs): |
144 | + def get_function_names(self, program_name, search=None, max_funcs=None): |
145 | """ |
146 | Execute query to retrieve a list of all functions in the program. |
147 | Any of the output names can be used verbatim in any SextantConnection |
148 | @@ -845,15 +852,82 @@ |
149 | if not validate_query(program_name): |
150 | return set() |
151 | |
152 | + limit = "LIMIT {}".format(max_funcs) if max_funcs else "" |
153 | + |
154 | if not search: |
155 | q = (' MATCH (:program {{name: "{}"}})-[:subject]->(f:func)' |
156 | - ' RETURN f.name LIMIT {}').format(program_name, max_funcs) |
157 | + ' RETURN f.name {}').format(program_name, limit) |
158 | else: |
159 | q = (' MATCH (:program {{name: "{}"}})-[:subject]->(f:func)' |
160 | - ' WHERE f.name =~ ".*{}.*" RETURN f.name LIMIT {}' |
161 | - .format(program_name, search, max_funcs)) |
162 | + ' WHERE f.name =~ ".*{}.*" RETURN f.name {}' |
163 | + .format(program_name, search, limit)) |
164 | return {func[0] for func in self._db.query(q)} |
165 | |
166 | + @staticmethod |
167 | + def get_query(identifier, search): |
168 | + """ |
169 | + Builds a filter query from a search pattern which may contain commas |
170 | + and/or wildcards. |
171 | + |
172 | + Return: |
173 | + string: part of a valid cypher query. |
174 | + Arguments: |
175 | + identifier: |
176 | + The identifier of the node whose properties to filter on, |
177 | + e.g. 'f' after a 'MATCH (f:func) ...' |
178 | + search: |
179 | + The pattern to build the search from, of form: |
180 | + '<name patterns>:<path patterns>' |
181 | + where patterns are possibly empty, possibly comma separated |
182 | + lists of strings, which will be compared to the 'name' and |
183 | + 'file' (path) attributes of 'identifier'. |
184 | + |
185 | + These strings may contain wildcards: e.g: |
186 | + .*substring.* |
187 | + sub.*string |
188 | + etc. |
189 | + |
190 | + """ |
191 | + if ':' in search: |
192 | + func_subs, file_subs = search.split(':') |
193 | + else: |
194 | + func_subs, file_subs = search, '' |
195 | + |
196 | + # Remove empty strings. |
197 | + func_subs = [sub for sub in func_subs.split(',') if sub] |
198 | + file_subs = [sub for sub in file_subs.split(',') if sub] |
199 | + |
200 | + # Cases for search: |
201 | + # <specific name>:<redundant stuff> |
202 | + # <wildcard name>:<specific filepath> |
203 | + # <wildcard name>:<wildcard filepath> |
204 | + |
205 | + query_str = "" |
206 | + |
207 | + def get_list(subs): |
208 | + return '[{}]'.format(','.join("'{}'".format(s) for s in subs)) |
209 | + |
210 | + |
211 | + if func_subs and not any('*' in sub for sub in func_subs): |
212 | + # List of specific functions. Don't care about anything after ':' |
213 | + query_str += ('USING INDEX {0}:func(name) WHERE {0}.name IN {1} ' |
214 | + .format(identifier, get_list(func_subs))) |
215 | + else: |
216 | + if file_subs and not any('*' in sub for sub in file_subs): |
217 | + # Specific file to look in. |
218 | + query_str = ('USING INDEX {0}.func(file) WHERE {0}.file IN {1} ' |
219 | + .format(identifier, get_list(file_subs))) |
220 | + elif file_subs: |
221 | + query_str = ('WHERE ANY (s_file IN {} WHERE {}.file =~ s_file) ' |
222 | + .format(get_list(file_subs), identifier)) |
223 | + |
224 | + if func_subs: |
225 | + query_str += 'AND ' if file_subs else 'WHERE ' |
226 | + query_str += ('ANY (s_name IN {} WHERE {}.name =~ s_name) ' |
227 | + .format(get_list(func_subs), identifier)) |
228 | + |
229 | + return query_str |
230 | + |
231 | def get_all_functions_called(self, program_name, function_calling): |
232 | """ |
233 | Execute query to find all functions called by a function (indirectly). |
234 | @@ -863,14 +937,9 @@ |
235 | :param function_calling: string name of a function whose children to find |
236 | :return: FunctionQueryResult, maximal subgraph rooted at function_calling |
237 | """ |
238 | - |
239 | - if not self.check_function_exists(program_name, function_calling): |
240 | - return None |
241 | - |
242 | - q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})' |
243 | - ' USING INDEX f:func(name)' |
244 | - ' MATCH (f)-[:calls*]->(g) RETURN distinct f, g' |
245 | - .format(program_name, function_calling)) |
246 | + q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func) {}' |
247 | + ' MATCH (f)-[:calls]->(g:func) RETURN distinct f, g' |
248 | + .format(program_name, SextantConnection.get_query('f', function_calling))) |
249 | |
250 | return self._execute_query(program_name, q) |
251 | |
252 | @@ -884,14 +953,10 @@ |
253 | :return: FunctionQueryResult, maximal connected subgraph with leaf function_called |
254 | """ |
255 | |
256 | - if not self.check_function_exists(program_name, function_called): |
257 | - return None |
258 | - |
259 | - q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(g:func {{name: "{}"}})' |
260 | - ' USING INDEX g:func(name)' |
261 | - ' MATCH (f)-[:calls*]->(g) WHERE f.name <> "{}"' |
262 | - ' RETURN distinct f , g') |
263 | - q = q.format(program_name, function_called, program_name) |
264 | + q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(g:func) {}' |
265 | + ' MATCH (f)-[:calls]->(g)' |
266 | + ' RETURN distinct f, g') |
267 | + q = q.format(program_name, SextantConnection.get_query('g', function_called), program_name) |
268 | |
269 | return self._execute_query(program_name, q) |
270 | |
271 | @@ -910,22 +975,17 @@ |
272 | if not self.check_program_exists(program_name): |
273 | return None |
274 | |
275 | - if not self.check_function_exists(program_name, function_called): |
276 | - return None |
277 | - |
278 | - if not self.check_function_exists(program_name, function_calling): |
279 | - return None |
280 | - |
281 | - q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(start:func {{name: "{}"}})' |
282 | - ' USING INDEX start:func(name)' |
283 | - ' MATCH (p)-[:subject]->(end:func {{name: "{}"}})' |
284 | - ' USING INDEX end:func(name)' |
285 | + start_q = SextantConnection.get_query('start', function_calling) |
286 | + end_q = SextantConnection.get_query('end', function_called) |
287 | + |
288 | + q = (' MATCH (p:program {{name: "{}"}})' |
289 | + ' MATCH (p)-[:subject]->(start:func) {} WITH start, p' |
290 | + ' MATCH (p)-[:subject]->(end:func) {} WITH start, end' |
291 | ' MATCH path=(start)-[:calls*]->(end)' |
292 | ' WITH DISTINCT nodes(path) AS result' |
293 | ' UNWIND result AS answer' |
294 | ' RETURN answer') |
295 | - q = q.format(program_name, function_calling, function_called) |
296 | - |
297 | + q = q.format(program_name, start_q, end_q) |
298 | return self._execute_query(program_name, q) |
299 | |
300 | def get_whole_program(self, program_name): |
301 | @@ -942,7 +1002,7 @@ |
302 | ' RETURN (f)'.format(program_name)) |
303 | return self._execute_query(program_name, q) |
304 | |
305 | - def get_shortest_path_between_functions(self, program_name, func1, func2): |
306 | + def get_shortest_path_between_functions(self, program_name, function_calling, function_called): |
307 | """ |
308 | Execute query to get a single, shortest, path between two functions. |
309 | :param program_name: string name of the program we wish to search under |
310 | @@ -953,17 +1013,16 @@ |
311 | if not self.check_program_exists(program_name): |
312 | return None |
313 | |
314 | - if not self.check_function_exists(program_name, func1): |
315 | - return None |
316 | - |
317 | - if not self.check_function_exists(program_name, func2): |
318 | - return None |
319 | - |
320 | - q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(f:func {{name: "{}"}})' |
321 | - ' USING INDEX f:func(name)' |
322 | - ' MATCH (p)-[:subject]->(g:func {{name: "{}"}})' |
323 | - ' MATCH path=shortestPath((f)-[:calls*]->(g))' |
324 | - ' UNWIND nodes(path) AS ans' |
325 | - ' RETURN ans'.format(program_name, func1, func2)) |
326 | + start_q = SextantConnection.get_query('start', function_calling) |
327 | + end_q = SextantConnection.get_query('end', function_called) |
328 | + |
329 | + q = (' MATCH (p:program {{name: "{}"}})' |
330 | + ' MATCH (p)-[:subject]->(start:func) {} WITH start, p' |
331 | + ' MATCH (p)-[:subject]->(end:func) {} WITH start, end' |
332 | + ' MATCH path=shortestPath((start)-[:calls*]->(end))' |
333 | + ' UNWIND nodes(path) AS answer' |
334 | + ' RETURN answer') |
335 | + q = q.format(program_name, start_q, end_q) |
336 | |
337 | return self._execute_query(program_name, q) |
338 | + |
339 | |
340 | === modified file 'src/sextant/export.py' |
341 | --- src/sextant/export.py 2014-10-13 14:58:12 +0000 |
342 | +++ src/sextant/export.py 2014-11-19 10:32:48 +0000 |
343 | @@ -48,7 +48,7 @@ |
344 | for func in program.get_functions(): |
345 | if func.type == "stub": |
346 | output_str += ' "{}" [fillcolor=pink, style=filled]\n'.format(func.name) |
347 | - elif func.type == "function_pointer": |
348 | + elif func.type == "pointer": |
349 | output_str += ' "{}" [fillcolor=yellow, style=filled]\n'.format(func.name) |
350 | |
351 | # in all cases, even if we've specified that we want a filled-in |
352 | |
353 | === modified file 'src/sextant/objdump_parser.py' (properties changed: +x to -x) |
354 | --- src/sextant/objdump_parser.py 2014-10-23 11:15:48 +0000 |
355 | +++ src/sextant/objdump_parser.py 2014-11-19 10:32:48 +0000 |
356 | @@ -42,9 +42,12 @@ |
357 | The number of function calls that have been parsed. |
358 | function_ptr_count: |
359 | The number of function pointers that have been detected. |
360 | - _known_stubs: |
361 | - A set of the names of functions with type 'stub' that have been |
362 | - parsed - used to avoid registering a stub multiple times. |
363 | + _known_functions: |
364 | + A set of the names of functions that have been |
365 | + parsed - used to avoid registering a function multiple times. |
366 | + _partial_functions: |
367 | + A set of functions whose names we have seen but whose source |
368 | + files we don't yet know. |
369 | |
370 | """ |
371 | def __init__(self, file_path, file_object=None, |
372 | @@ -102,13 +105,14 @@ |
373 | self.call_count = 0 |
374 | self.function_ptr_count = 0 |
375 | |
376 | - # Avoid adding duplicate function stubs (as these are detected from |
377 | - # function calls so may be repeated). |
378 | - self._known_stubs = set() |
379 | + # Avoid adding duplicate functions. |
380 | + self._known_functions = set() |
381 | + # Set of partially-parsed functions. |
382 | + self._partial_functions = set() |
383 | |
384 | # By default print information to stdout. |
385 | - def print_func(name, typ): |
386 | - print('func {:25}{}'.format(name, typ)) |
387 | + def print_func(name, typ, source='unknown'): |
388 | + print('func {:25}{:15}{}'.format(name, typ, source)) |
389 | |
390 | def print_call(caller, callee): |
391 | print('call {:25}{:25}'.format(caller, callee)) |
392 | @@ -116,7 +120,6 @@ |
393 | def print_started(parser): |
394 | print('parse started: {}[{}]'.format(self.path, ', '.join(self.sections))) |
395 | |
396 | - |
397 | def print_finished(parser): |
398 | print('parsed {} functions and {} calls'.format(self.function_count, self.call_count)) |
399 | |
400 | @@ -134,12 +137,32 @@ |
401 | self.function_ptr_count += 1 |
402 | return name |
403 | |
404 | - def _add_function_normal(self, name): |
405 | - """ |
406 | - Add a function which we have full assembly code for. |
407 | - """ |
408 | - self.add_function(name, 'normal') |
409 | - self.function_count += 1 |
410 | + def _add_function(self, name, source=None): |
411 | + """ |
412 | + Add a partially known or fully known function. |
413 | + """ |
414 | + if source is None: |
415 | + # Partial definition - if do not already have a full definition |
416 | + # for this name then add it to the partials set. |
417 | + if not name in self._known_functions: |
418 | + self._partial_functions.add(name) |
419 | + elif source == 'unknown': |
420 | + # Manually adding a stub function. |
421 | + self.add_function(name, 'stub', source) |
422 | + self.function_count += 1 |
423 | + elif name not in self._known_functions: |
424 | + # A full definition - either upgrade from partial function |
425 | + # to known function, or add directly to known functions |
426 | + # (otherwise we have already seen it) |
427 | + |
428 | + try: |
429 | + self._partial_functions.remove(name) |
430 | + except KeyError: |
431 | + pass |
432 | + |
433 | + self._known_functions.add(name) |
434 | + self.add_function(name, 'normal', source) |
435 | + self.function_count += 1 |
436 | |
437 | def _add_function_ptr(self, name): |
438 | """ |
439 | @@ -148,15 +171,6 @@ |
440 | self.add_function(name, 'pointer') |
441 | self.function_count += 1 |
442 | |
443 | - def _add_function_stub(self, name): |
444 | - """ |
445 | - Add a function stub - we have its name but none of its internals. |
446 | - """ |
447 | - if not name in self._known_stubs: |
448 | - self._known_stubs.add(name) |
449 | - self.add_function(name, 'stub') |
450 | - self.function_count += 1 |
451 | - |
452 | def _add_call(self, caller, callee): |
453 | """ |
454 | Add a function call from caller to callee. |
455 | @@ -171,10 +185,20 @@ |
456 | self.started() |
457 | |
458 | if self._file is not None: |
459 | - in_section = False # if we are in one of self.sections |
460 | - current_function = None # track the caller for function calls |
461 | + in_section = False # If we are in one of self.sections. |
462 | + current_function = None # Track the caller for function calls. |
463 | + to_add = False |
464 | |
465 | for line in self._file: |
466 | + if to_add: |
467 | + file_line = line.startswith('/') |
468 | + source = line.split(':')[0] if file_line else None |
469 | + self._add_function(current_function, source) |
470 | + to_add = False |
471 | + |
472 | + if file_line: |
473 | + continue |
474 | + |
475 | if line.startswith('Disassembly'): |
476 | # 'Disassembly of section <name>:\n' |
477 | section = line.split(' ')[-1].rstrip(':\n') |
478 | @@ -189,12 +213,19 @@ |
479 | # <function_name>[@plt] |
480 | function_identifier = line.split('<')[-1].split('>')[0] |
481 | |
482 | + # IOS builds add a __be_ (big endian) prefix to all functions, |
483 | + # get rid of it if it is there, |
484 | + if function_identifier.startswith('__be_'): |
485 | + function_identifier = function_identifier.lstrip('__be_') |
486 | + |
487 | if '@' in function_identifier: |
488 | + # Of form <function name>@<other stuff>. |
489 | current_function = function_identifier.split('@')[0] |
490 | - self._add_function_stub(current_function) |
491 | + self._add_function(current_function) |
492 | else: |
493 | current_function = function_identifier |
494 | - self._add_function_normal(current_function) |
495 | + # Flag function - we look for source on the next line. |
496 | + to_add = True |
497 | |
498 | elif 'call ' in line or 'callq ' in line: |
499 | # WHITESPACE to prevent picking up function names |
500 | @@ -213,9 +244,12 @@ |
501 | # from which we extract name |
502 | callee_is_ptr = False |
503 | function_identifier = callee_info.lstrip('<').rstrip('>\n') |
504 | + if function_identifier.startswith('__be_'): |
505 | + function_identifier = function_identifier.lstrip('__be_') |
506 | + |
507 | if '@' in function_identifier: |
508 | callee = function_identifier.split('@')[0] |
509 | - self._add_function_stub(callee) |
510 | + self._add_function(callee) |
511 | else: |
512 | callee = function_identifier.split('-')[-1].split('+')[0] |
513 | # Do not add this fn now - it is a normal func |
514 | @@ -231,6 +265,10 @@ |
515 | # Add the call. |
516 | if not (self.ignore_ptrs and callee_is_ptr): |
517 | self._add_call(current_function, callee) |
518 | + |
519 | + for name in self._partial_functions: |
520 | + self._add_function(name, 'unknown') |
521 | + |
522 | |
523 | self.finished() |
524 | |
525 | @@ -261,7 +299,7 @@ |
526 | return result |
527 | |
528 | |
529 | -def run_objdump(input_file): |
530 | +def run_objdump(input_file, add_file_paths=False): |
531 | """ |
532 | Run the objdump command on the file with the given path. |
533 | |
534 | @@ -271,13 +309,24 @@ |
535 | Arguments: |
536 | input_file: |
537 | The path of the file to run objdump on. |
538 | + add_file_paths: |
539 | + Whether to call with -l option to extract line numbers and source |
540 | + files from the binary. VERY SLOW on large binaries (~15 hours for ios). |
541 | |
542 | """ |
543 | + print('input file: {}'.format(input_file)) |
544 | # A single section can be specified for parsing with the -j flag, |
545 | # but it is not obviously possible to parse multiple sections like this. |
546 | - p = subprocess.Popen(['objdump', '-d', input_file, '--no-show-raw-insn'], |
547 | - stdout=subprocess.PIPE) |
548 | - g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$'], stdin=p.stdout, stdout=subprocess.PIPE) |
549 | + args = ['objdump', '-d', input_file, '--no-show-raw-insn'] |
550 | + if add_file_paths: |
551 | + args += ['--line-numbers'] |
552 | + |
553 | + p = subprocess.Popen(args, stdout=subprocess.PIPE) |
554 | + # Egrep filters out the section headers (Disassembly of section...), |
555 | + # the call lines (... [l]call[q] ...), the function declarations |
556 | + # (... <function>:$) and the file paths (^/file_path). |
557 | + g = subprocess.Popen(['egrep', 'Disassembly|call(q)? |>:$|^/'], |
558 | + stdin=p.stdout, stdout=subprocess.PIPE) |
559 | return input_file, g.stdout |
560 | |
561 | |
562 | |
563 | === modified file 'src/sextant/test_parser.py' |
564 | --- src/sextant/test_parser.py 2014-10-23 11:15:48 +0000 |
565 | +++ src/sextant/test_parser.py 2014-11-19 10:32:48 +0000 |
566 | @@ -23,7 +23,7 @@ |
567 | calls = defaultdict(list) |
568 | |
569 | # set the Parser to put output in local dictionaries |
570 | - add_function = lambda n, t: self.add_function(functions, n, t) |
571 | + add_function = lambda n, t, s='unknown': self.add_function(functions, n, t) |
572 | add_call = lambda a, b: self.add_call(calls, a, b) |
573 | |
574 | p = parser.Parser(path, sections=sections, ignore_ptrs=ignore_ptrs, |
575 | |
576 | === modified file 'src/sextant/test_resources/parser_test' |
577 | Binary files src/sextant/test_resources/parser_test 2014-10-13 14:10:01 +0000 and src/sextant/test_resources/parser_test 2014-11-19 10:32:48 +0000 differ |
578 | === modified file 'src/sextant/update_db.py' |
579 | --- src/sextant/update_db.py 2014-10-17 14:20:06 +0000 |
580 | +++ src/sextant/update_db.py 2014-11-19 10:32:48 +0000 |
581 | @@ -20,7 +20,7 @@ |
582 | import logging |
583 | |
584 | def upload_program(connection, user_name, file_path, program_name=None, |
585 | - not_object_file=False): |
586 | + not_object_file=False, add_file_paths=False): |
587 | """ |
588 | Upload a program's functions and call graph to the database. |
589 | |
590 | @@ -38,6 +38,9 @@ |
591 | not_object_file: |
592 | Flag controlling whether file_path is pointing to a dump file or |
593 | a binary file. |
594 | + add_file_paths: |
595 | + Flag controlling whether to call objdump with the -l option to |
596 | + extract line numbers and source files. VERY SLOW on large binaries. |
597 | """ |
598 | if not connection._ssh: |
599 | raise SSHConnectionError('An SSH connection is required for ' |
600 | @@ -59,9 +62,9 @@ |
601 | start = time() |
602 | |
603 | if not not_object_file: |
604 | - print('Generating dump file...', end='') |
605 | + print('Generating dump file with{} file paths...'.format(('out', '')[add_file_paths]), end='') |
606 | sys.stdout.flush() |
607 | - file_path, file_object = run_objdump(file_path) |
608 | + file_path, file_object = run_objdump(file_path, add_file_paths) |
609 | print('done.') |
610 | else: |
611 | file_object = None |
612 | @@ -82,15 +85,19 @@ |
613 | print('done: {} functions and {} calls.' |
614 | .format(parser.function_count, parser.call_count)) |
615 | |
616 | - parser = Parser(file_path = file_path, file_object = file_object, |
617 | + parser = Parser(file_path=file_path, file_object = file_object, |
618 | sections=[], |
619 | - add_function = program.add_function, |
620 | - add_call = program.add_call, |
621 | + add_function=program.add_function, |
622 | + add_call=program.add_call, |
623 | started=lambda parser: start_parser(program), |
624 | finished=lambda parser: finish_parser(parser, program)) |
625 | + |
626 | parser.parse() |
627 | - |
628 | - program.commit() |
629 | + |
630 | + if parser.function_count == 0: |
631 | + print('Nothing to upload. Did you mean to add the --not-object-file flag?') |
632 | + else: |
633 | + program.commit() |
634 | |
635 | end = time() |
636 | print('Finished in {:.2f}s.'.format(end-start)) |
637 | |
638 | === modified file 'src/sextant/web/server.py' |
639 | --- src/sextant/web/server.py 2014-11-19 10:32:48 +0000 |
640 | +++ src/sextant/web/server.py 2014-11-19 10:32:48 +0000 |
641 | @@ -13,6 +13,8 @@ |
642 | from twisted.internet.threads import deferToThread |
643 | from twisted.internet import defer |
644 | |
645 | +from neo4jrestclient.exceptions import TransactionException |
646 | + |
647 | import logging |
648 | import os |
649 | import json |
650 | @@ -24,6 +26,8 @@ |
651 | import tempfile |
652 | import subprocess |
653 | |
654 | +from datetime import datetime |
655 | + |
656 | from cgi import escape # deprecated in Python 3 in favour of html.escape, but we're stuck on Python 2 |
657 | |
658 | # global SextantConnection object which deals with the port forwarding |
659 | @@ -174,13 +178,15 @@ |
660 | # if we are okay here we have a valid query with all required arguments |
661 | if res_code is RESPONSE_CODE_OK: |
662 | try: |
663 | + print('running query {}'.format(datetime.now())) |
664 | program = yield defer_to_thread_with_timeout(render_timeout, fn, |
665 | name, *req_args) |
666 | - except defer.CancelledError: |
667 | + print('\tdone {}'.format(datetime.now())) |
668 | + except Exception as e: |
669 | # the timeout has fired and cancelled the request |
670 | res_code = RESPONSE_CODE_BAD_REQUEST |
671 | - res_fmt = "The request timed out after {} seconds." |
672 | - res_msg = res_fmt.format(render_timeout) |
673 | + res_msg = "{}".format(e) |
674 | + print('\tfailed {}'.format(datetime.now())) |
675 | |
676 | if res_code is RESPONSE_CODE_OK: |
677 | # we have received a response to our request |
678 | @@ -201,10 +207,12 @@ |
679 | suppress_common = suppress_common_arg in ('null', 'true') |
680 | |
681 | # we have a non-empty return - render it |
682 | + print('getting plot {}'.format(datetime.now())) |
683 | res_msg = yield deferToThread(self.get_plot, program, |
684 | suppress_common, |
685 | remove_self_calls=False) |
686 | request.setHeader('content-type', 'image/svg+xml') |
687 | + print('\tdone {}'.format(datetime.now())) |
688 | |
689 | request.setResponseCode(res_code) |
690 | request.write(res_msg) |
691 | @@ -229,6 +237,7 @@ |
692 | max_funcs = AUTOCOMPLETE_NAMES_LIMIT + 1 |
693 | programs = CONNECTION.programs_with_metadata() |
694 | result = CONNECTION.get_function_names(program_name, search, max_funcs) |
695 | + print(search, len(result)) |
696 | return result if len(result) < max_funcs else set() |
697 | |
698 |