Sextant

Merge lp:~ben-hutchings/ensoft-sextant/wierd-names-clean into lp:ensoft-sextant

wierd-names-clean
Merge into whiteline

Proposed by Ben Hutchings on 2014-11-25

Status:	Merged
Approved by:	Robert on 2014-12-12
Approved revision:	69
Merged at revision:	36
Proposed branch:	lp:~ben-hutchings/ensoft-sextant/wierd-names-clean
Merge into:	lp:ensoft-sextant
Prerequisite:	lp:~ben-hutchings/ensoft-sextant/rel-merge
Diff against target:	474 lines (+190/-48) 10 files modified src/sextant/db_api.py (+7/-3) src/sextant/objdump_parser.py (+22/-8) src/sextant/test_all.py (+39/-0) src/sextant/test_all.sh (+0/-4) src/sextant/test_db.py (+39/-13) src/sextant/test_parser.py (+36/-10) src/sextant/test_resources/parser_file2.c (+24/-0) src/sextant/test_resources/parser_header.h (+10/-0) src/sextant/test_resources/parser_test.c (+3/-9) src/sextant/test_sshmanager.py (+10/-1)
To merge this branch:	bzr merge lp:~ben-hutchings/ensoft-sextant/wierd-names-clean
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Robert		2014-11-25	Approve on 2014-12-12
Review via email: mp+242752@code.launchpad.net

This proposal supersedes a proposal from 2014-11-25.

Commit message

Function names are now cleaned up by a helper function, which removes __be_ prefixes if they are there (bi-endian builds) and converts names like <name>.<other stuff> to just <name>.

Tests extended to check that this works.

Description of the change

Function names are now cleaned up by a helper function, which removes __be_ prefixes if they are there (bi-endian builds) and converts names like <name>.<other stuff> to just <name>.

Tests extended to check that this works.

lp:~ben-hutchings/ensoft-sextant/wierd-names-clean updated on 2014-12-12

64. By Ben Hutchings on 2014-11-25: merge from rel-merge markups
65. By Ben Hutchings on 2014-11-25: extended tests, replaced test_all.sh with test_all.py which also generates code coverage reports for the tested modeuls
66. By Ben Hutchings on 2014-11-25: whitespace fix
67. By Ben Hutchings on 2014-11-28: fixed bug with get_all_functions_calling
68. By Ben Hutchings on 2014-12-08: very minor change to the default objdump_parser print output format to include more information added since this was originally written
69. By Ben Hutchings on 2014-12-12: doc comment fix

Revision history for this message

Robert (rjwills) on 2014-12-12:

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Ben Hutchings

Ensoft Patch Lander

Patrick Stevens

 === modified file 'src/sextant/db_api.py'
 --- src/sextant/db_api.py	2014-11-25 11:40:03 +0000
 +++ src/sextant/db_api.py	2014-12-12 11:30:28 +0000
@@ -996,7 +996,7 @@
          return self._execute_query(program_name, q)
--    def get_all_functions_calling(self, program_name, function_called,
++    def get_all_functions_calling(self, program_name, function_calling,
                                    limit_internal=False, max_depth=1):
          """
          Return functions calling the specified functions.
@@ -1014,12 +1014,16 @@
              program_name:
                  The name of the program to query.
--            function_called:
++            function_calling:
                  A string of form  <name_match>:<file_match>, where at least
                  one of name_match and file_match is provided, and each may be a
                  comma separated list of strings containing wildcard '.*'
                  sequences. Specifies the list of functions to match.
++                NOTE: the name is a bit of a hack to work with the javascript.
++                Should really be function_called but that would require more
++                js fiddling.
++
              limit_internal:
                  If true, only explore internal calls.
@@ -1034,7 +1038,7 @@
          q = (' MATCH (p:program {{name: "{}"}})-[:subject]->(g:func) {}'
               ' MATCH (f)-[{}*0..{}]->(g)'
               ' RETURN distinct f, g')
--        q = q.format(program_name, SextantConnection.get_query('g', function_called),
++        q = q.format(program_name, SextantConnection.get_query('g', function_calling),
                       ':internal' if limit_internal else ':internal|external',
                       max_depth or '')
 === modified file 'src/sextant/objdump_parser.py'
 --- src/sextant/objdump_parser.py	2014-11-25 11:40:03 +0000
 +++ src/sextant/objdump_parser.py	2014-12-12 11:30:28 +0000
@@ -121,7 +121,7 @@
              print('func {:25} {:15}{}'.format(name, typ, source))
          def print_call(caller, callee, is_internal):
--            print('call {:25} {:25}'.format(caller, callee))
++            print('call {} {:25} {:25}'.format('EI'[is_internal], caller, callee))
          def print_started(parser):
              print('parse started: {}[{}]'.format(self.path, ', '.join(self.sections)))
@@ -199,10 +199,27 @@
                  self.add_call(caller, callee, False)
                  self._known_calls.add((caller, callee))
                  self.call_count += 1
--                print(caller, callee)
              else:
                  self._partial_calls.add((caller, callee))
++    @staticmethod
++    def clean_id(function_identifier):
++        """
++        Clean the funciton identifier string.
++        """
++        # Bi-endian builds add a __be_ prefix to all functions,
++        # get rid of it if it is there,
++        if function_identifier.startswith('__be_'):
++            function_identifier = function_identifier[len('__be_'):]
++
++        # Some functions look like <identifier>. or <identifier>..<digit>
++        # - get rid of the extra bits here:
++        if '.' in function_identifier:
++            function_identifier = function_identifier.split('.')[0]
++
++        return function_identifier
++
++
      def parse(self):
          """
          Parse self._file.
@@ -242,10 +259,7 @@
                          # <function_name>[@plt]
                          function_identifier = line.split('<')[-1].split('>')[0]
--                        # IOS builds add a __be_ (big endian) prefix to all functions,
--                        # get rid of it if it is there,
--                        if function_identifier.startswith('__be_'):
--                            function_identifier = function_identifier[len('__be_'):]
++                        function_identifier = self.clean_id(function_identifier)
                          if '@' in function_identifier:
                              # Of form <function name>@<other stuff>.
@@ -277,8 +291,8 @@
                              # from which we extract name
                              callee_is_ptr = False
                              function_identifier = callee_info.lstrip('<').rstrip('>\n')
--                            if function_identifier.startswith('__be_'):
--                                function_identifier = function_identifier[len('__be_'):]
++
++                            function_identifier = self.clean_id(function_identifier)
                              if '@' in function_identifier:
                                  callee = function_identifier.split('@')[0]
 === added file 'src/sextant/test_all.py'
 --- src/sextant/test_all.py	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_all.py	2014-12-12 11:30:28 +0000
@@ -0,0 +1,39 @@
++#!/usr/bin/python
++
++from __future__ import print_function
++
++import subprocess
++import shlex
++
++tests = (('test_parser.py', 'objdump_parser.py'),
++         ('test_csvwriter.py', 'csvwriter.py'),
++         ('test_sshmanager.py', 'sshmanager.py'),
++         ('test_db.py', 'db_api.py'))
++
++pycmd = 'coverage run {}'
++covcmd = 'coverage report -m {}'
++
++if __name__ == '__main__':
++    Popen = subprocess.Popen
++
++    for test, name in tests:
++        print('Running tests: {}'.format(test))
++        do_print = False
++
++
++        pyproc = Popen(shlex.split(pycmd.format(test)), stdout=subprocess.PIPE)
++        for line in pyproc.stdout:
++            if '----------' in line or '=========' in line:
++                do_print = True
++
++            if do_print:
++                print(line.rstrip())
++
++        covproc = Popen(shlex.split(covcmd.format(name)))
++        covproc.wait()
++
++
++
++
++
++
 === removed file 'src/sextant/test_all.sh'
 --- src/sextant/test_all.sh	2014-10-13 16:01:59 +0000
 +++ src/sextant/test_all.sh	1970-01-01 00:00:00 +0000
@@ -1,4 +0,0 @@
--#!/usr/bin/bash
--
--PYTHONPATH=$PYTHONPATH:~/.
--python -m unittest discover --pattern=test_*.py
 === modified file 'src/sextant/test_db.py'
 --- src/sextant/test_db.py	2014-11-25 11:40:03 +0000
 +++ src/sextant/test_db.py	2014-12-12 11:30:28 +0000
@@ -12,7 +12,7 @@
  import update_db
  PNAME = 'tester-parser_test'
--NORMAL = {'main', 'normal', 'wierd$name', 'duplicates'}
++NORMAL = {'main', 'normal', 'wierd$name', 'duplicates', 'name', 'puts'}
  class TestFunctionQueryResults(unittest.TestCase):
@@ -37,10 +37,12 @@
          names = get_names(PNAME)
          # Test file wildcard search
          parser_names = get_names(PNAME, search=':.*parser_test.c')
++        file2_names = get_names(PNAME, search=':.*parser_file2.c')
          self.assertTrue(names.issuperset(NORMAL))
--        self.assertEquals(len(names), 24)
--        self.assertEquals(parser_names, {u'main', u'normal', u'duplicates', u'wierd$name'})
++        self.assertEquals(len(names), 27)
++        self.assertEquals(parser_names, {u'main', u'normal', u'duplicates'})
++        self.assertEquals(file2_names, {u'wierd$name', u'name'})
          # Test the wildcard matching
          search = self.connection.get_function_names(PNAME, search='.*libc.*')
@@ -58,13 +60,12 @@
      def test_get_all_functions_called(self):
          get_fns = self.connection.get_all_functions_called
--        for depth, num in zip([0, 1, 2, 3], [8, 3, 8, 8]):
++        for depth, num in zip(range(5), [11, 3, 9, 10, 11]):
              result = get_fns(PNAME, 'main', False, depth).functions
--            self.assertEquals(len(result), num, str(result))
++            self.assertEquals(len(result), num)
--        for depth, num in zip([0, 1, 2, 3], [8, 4, 8, 8]):
++        for depth, num in zip(range(5), [9, 6, 9, 9, 9]):
              # Limit to internal functions
--            # TODO this isn't a great test - need greater call depth
              result = get_fns(PNAME, 'main', True, depth).functions
              self.assertEquals(len(result), num)
@@ -80,18 +81,43 @@
              self.assertEquals(len(result), num)
      def test_get_all_paths_between(self):
--       get_paths = self.connection.get_call_paths
--
--       result = {f.name for f in get_paths(PNAME, 'main', 'wierd$name', True, 0).functions}
--       exp = {'main', 'normal', 'duplicates', 'wierd$name'}
--       self.assertEquals(result, exp)
++        get_paths = self.connection.get_call_paths
++
++        result = {f.name for f in get_paths(PNAME, 'main', 'wierd$name', False, 0).functions}
++        exp = {'main', 'normal', 'duplicates', 'wierd$name'}
++        self.assertEquals(result, exp)
++
++        self.assertFalse(get_paths(PNAME, 'main', 'wierd$name', True, 0).functions)
      def test_get_shortest_paths_between(self):
          get_paths = self.connection.get_shortest_path_between_functions
--        result = {f.name for f in get_paths(PNAME, 'main', 'wierd$name', True, 0).functions}
++        result = {f.name for f in get_paths(PNAME, 'main', 'wierd$name', False, 0).functions}
          exp = {u'main', u'normal', u'wierd$name'}
          self.assertEquals(result, exp)
++
++        self.assertFalse(get_paths(PNAME, 'main', 'wierd$name', True, 0).functions)
++
++    def test_programs_with_metadata(self):
++        result = self.connection.programs_with_metadata()
++        found = False
++
++        for program in result:
++            if program.program_name == PNAME:
++                found = True
++
++                self.assertEquals(program.number_of_funcs, 27)
++                self.assertEquals(program.number_of_calls, 26)
++
++                break
++
++        self.assertTrue(found)
++
++    def test_get_whole_program(self):
++        result = self.connection.get_whole_program(PNAME)
++        self.assertEquals(len(result.functions), 27)
++
++        self.assertFalse(self.connection.get_whole_program('no such program'))
  if __name__ == '__main__':
      unittest.main()
 === modified file 'src/sextant/test_parser.py'
 --- src/sextant/test_parser.py	2014-11-20 17:25:59 +0000
 +++ src/sextant/test_parser.py	2014-12-12 11:30:28 +0000
@@ -5,6 +5,7 @@
  import objdump_parser as parser
++OBJ_FILE = 'test_resources/parser_test'
  DUMP_FILE = 'test_resources/parser_test.dump'
  class TestSequence(unittest.TestCase):
@@ -12,7 +13,8 @@
          pass
      def add_function(self, dct, name, typ, source):
--        self.assertFalse(name in dct, "duplicate function added: {} into {}".format(name, dct.keys()))
++        self.assertFalse(name in dct, ("duplicate function added: {} into {}"
++                                       .format(name, dct.keys())))
          dct[name] = (typ, source)
      def add_call(self, dct, caller, callee, is_internal):
@@ -26,7 +28,9 @@
          add_function = lambda n, t, s: self.add_function(functions, n, t, s)
          add_call = lambda a, b, i: self.add_call(calls, a, b, i)
--        p = parser.Parser(path, sections=sections, ignore_ptrs=ignore_ptrs,
++        file_path, file_object = parser.run_objdump(OBJ_FILE, add_file_paths=True)
++        p = parser.Parser(file_path=None, file_object=file_object,
++                          sections=sections, ignore_ptrs=ignore_ptrs,
                            add_function=add_function, add_call=add_call)
          res = p.parse()
@@ -44,22 +48,26 @@
          res, funcs, calls = self.do_parse()
          known = 'parser_test.c'
++        files = 'parser_file2.c'
          unknown = 'unknown'
--        for name, typ, fle in zip(['normal', 'duplicates', 'wierd$name', 'printf', 'func_ptr_3'],
--                                  ['normal', 'normal', 'normal', 'stub', 'pointer'],
--                                  [known, known, known, unknown, unknown]):
++        for name, typ, fle in zip(['normal', 'duplicates', 'wierd$name', 'printf', 'func_ptr_3',
++                                   'name', 'puts', 'inl_func'],
++                                  ['normal', 'normal', 'normal', 'stub', 'pointer',
++                                   'normal', 'stub', 'normal'],
++                                  [known, known, files, unknown, unknown,
++                                   files, unknown, 'parser_header.h']):
              self.assertTrue(name in funcs, "'{}' not found in function dictionary".format(name))
              self.assertEquals(funcs[name][0], typ)
--            self.assertTrue(funcs[name][1].endswith(fle))
++            self.assertTrue(funcs[name][1].endswith(fle), "{}-{}".format(name, fle))
      def test_no_ptrs(self):
          # ensure that the ignore_ptrs flags is working
          res, funcs, calls = self.do_parse(ignore_ptrs=True)
--        self.assertFalse('pointer' in funcs.values())
--        self.assertEqual(len(calls['normal']), 2)
++        self.assertFalse('pointer' in (typ for typ, src in funcs.values()))
++        self.assertEqual(len(calls['normal']), 3)
      def test_calls(self):
@@ -68,7 +76,11 @@
          self.assertTrue(('normal', True) in calls['main'])
          self.assertTrue(('duplicates', True) in calls['main'])
--        normal_calls = sorted(['wierd$name', 'printf', 'func_ptr_3'])
++        self.assertTrue(('wierd$name', False) in calls['normal'])
++        self.assertTrue(('name', True) in calls['wierd$name'])
++        self.assertTrue(('puts', False) in calls['name'])
++
++        normal_calls = sorted(['wierd$name', 'printf', 'inl_func', 'func_ptr_3'])
          self.assertEquals(sorted(zip(*calls['normal'])[0]), normal_calls)
          self.assertEquals(calls['duplicates'].count(('normal', True)), 1)
@@ -77,6 +89,9 @@
          self.assertTrue(('func_ptr_4', True) in calls['duplicates'])
          self.assertTrue(('func_ptr_5', True) in calls['duplicates'])
++        # A function that should only be visible in the plt section
++        self.assertFalse('__gmon_start__' in funcs)
++
      def test_sections(self):
          res, funcs, calls = self.do_parse(sections=['.plt', '.text'])
@@ -84,6 +99,17 @@
          self.assertTrue('@' not in ''.join(funcs.keys()), "check names are extracted correctly")
          self.assertTrue('__gmon_start__' in funcs, "see a function defined only in .plt")
--
++    def test_clean_names(self):
++        clean_id = parser.Parser.clean_id
++        # Stripping of ios be prefixes
++        self.assertEquals(clean_id('__be_test__be_name'), 'test__be_name')
++        self.assertEquals(clean_id('_be_test_name'), '_be_test_name')
++        self.assertEquals(clean_id('__betest_name'), '__betest_name')
++
++        # Removing of extra bits at end
++        self.assertEquals(clean_id('test_name.'), 'test_name')
++        self.assertEquals(clean_id('test_name..0'), 'test_name')
++
++
  if __name__ == '__main__':
      unittest.main()
 === added file 'src/sextant/test_resources/parser_file2.c'
 --- src/sextant/test_resources/parser_file2.c	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_resources/parser_file2.c	2014-12-12 11:30:28 +0000
@@ -0,0 +1,24 @@
++#include <stdio.h>
++#include "parser_header.h"
++
++static int
++wierd$name(int a);
++
++static int
++__be_name(int a);
++
++static int
++wierd$name(int a)
++{
++    a = __be_name(inl_func(a));
++    return (a);
++}
++
++static int
++__be_name(int a)
++{
++    printf("In __be_name\n");
++    return (a+1);
++}
++
++
 === added file 'src/sextant/test_resources/parser_header.h'
 --- src/sextant/test_resources/parser_header.h	1970-01-01 00:00:00 +0000
 +++ src/sextant/test_resources/parser_header.h	2014-12-12 11:30:28 +0000
@@ -0,0 +1,10 @@
++#ifndef PARSER_H
++#define PARSER_H
++
++static inline int
++inl_func(int a)
++{
++    return (2*a);
++}
++
++#endif
 === modified file 'src/sextant/test_resources/parser_test'
 Binary files src/sextant/test_resources/parser_test	2014-11-17 13:53:27 +0000 and src/sextant/test_resources/parser_test	2014-12-12 11:30:28 +0000 differ
 === modified file 'src/sextant/test_resources/parser_test.c'
 --- src/sextant/test_resources/parser_test.c	2014-10-13 14:10:01 +0000
 +++ src/sextant/test_resources/parser_test.c	2014-12-12 11:30:28 +0000
@@ -1,11 +1,11 @@
  // COMMENT
  #include<stdio.h>
++#include "parser_header.h"
++#include "parser_file2.c"
  static int
  normal(int a);
--static int
--wierd$name(int a);
  typedef int (*pointer)(int);
@@ -18,19 +18,13 @@
      pointer ptr = wierd$name;
      wierd$name(a);
--    printf("%d\n", a);
++    printf("%d\n", inl_func(a));
      ptr(a);
      return (a);
+ }
  static int
--wierd$name(int a)
--{
--    return (a);
--}
--
--static int
  duplicates(int a)
+ {
      pointer ptr1 = wierd$name;
 === modified file 'src/sextant/test_sshmanager.py'
 --- src/sextant/test_sshmanager.py	2014-10-17 14:35:01 +0000
 +++ src/sextant/test_sshmanager.py	2014-12-12 11:30:28 +0000
@@ -16,7 +16,7 @@
              self.manager = None
      def get_manager(self, local_port=9643, remote_host='localhost',
--                    remote_port=9643, ssh_user=None):
++                    remote_port=9643, ssh_user=None, is_localhost=False):
          return sshmanager.SSHManager(local_port, remote_host, remote_port, ssh_user)
      def test_init(self):
@@ -34,6 +34,15 @@
          # check connecion failure
          self.assertRaises(sshmanager.SSHConnectionError, self.get_manager, remote_host='invalid host')
++    def test_localhost(self):
++        self.manager = self.get_manager(is_localhost=True)
++        self.assertTrue(os.path.isdir(self.manager._tmp_dir))
++        self.manager.close()
++        self.assertFalse(os.path.isdir(self.manager._tmp_dir))
++        self.manager = None
++
++
++
      def test_files(self):
          genuine_file = 'test_resources/parser_test.c'
          genuine_file2 = 'test_resources/parser_test'