[IPython-dev] pydoc and introspective features

Laurent Gautier lgautier@gmail....
Wed Feb 28 21:20:58 CST 2007


Nice. Thanks.

We already have a recursive data structure for objects, and implemented
search on both the pydoc string and on the elements of an object:
http://pydoc-r.svn.sourceforge.net/viewvc/pydoc-r/branches/pydoc_dag/pydoc/search.py?view=markup

I am very pleased to see that this is sort of design is suggested as well !
We do seem to have a similiar approach in spirit (we have different classes
for different object types), with the extra twist that we are trying
to model both
object sitting on the disk (in unloaded modules/python files) and
'live' objects
(that is 'in memory') under a unified structure.

I will look into getting in all the search goodies you propose.

Thanks again,

Laurent





2007/3/1, Greg Novak <novak@ucolick.org>:
> "Fernando Perez" <fperez.net@gmail.com> wrote:
> > - search.  This is probably the biggest gripe everyone has with python
> > vs. commercial interactive systems (such as Matlab or Mathematica).
> > Tab-completion and 'foo?' work great, but if you don't even know where
> > to begin looking for something, you're stuck.    A builtin indexing
> > system that could be either exposed via a web browser or to a
> > command-line program (such as ipython) would be very welcome by a lot
> > of users.
>
> While we're on the subject, I humbly submit my slow-as-a-slug but
> fairly general code to recursively search python objects looking for
> things.
>
> It looks inside modules, dicts, tuples, lists, and instances looking
> for things based on name, value, or docstring.  It's also pretty easy
> to extend it either to look inside different objects or else have a
> different definition of a 'match.'
>
> It returns a list of strings that tell you how to get to the thing you
> want.  A typical call would be:
>
> aproposName('needle', compoundObject)
>
> returns:
> ['arg[foo].bar[3]']
> Ie: "There's something named 'needle' in the third element of the
> attribute named bar of the object with dict key foo in the object
> passed as the argument."
>
> I've posted this before--this version fixes major problems (ie, some
> things I thought worked didn't work in the previously posted version).
>
> I've also attached test code.
>
> This is probably more useful as food for thought than for anything
> practical.  On the other hand it solves a somewhat more general
> problem, being able to look inside live object as opposed to searching
> only doc strings.
>
> Greg
>
> import unittest;
>
> import apropos as aproposModule
> from apropos import *
>
> class AproposTest(unittest.TestCase):
>     # Untested functions, but I think it's ok that way:
>     # _apropos  apropos
>
>     def testAproposName(self):
>         class Composite:
>             def __init__(self):
>                 self.a = 1
>                 self.foo = 'bar'
>                 self.b = 3
>         self.assertEqual(aproposName('foo', [1,'foo',2]),
>                          [])
>         self.assertEqual(aproposName('foo', (1,'foo',3)),
>                          [])
>         self.assertEqual(aproposName('foo', dict(a=1,foo='bar',b=3)),
>                          ['arg[foo]'])
>         self.assertEqual(aproposName('foo', Composite()),
>                          ['arg.foo'])
>
>         lst = aproposName('aproposName', aproposModule, exclude='_')
>         self.assertTrue('apropos.aproposName' in lst)
>         self.assertTrue('apropos.aproposNameRegexp' in lst)
>         self.assertFalse('apropos.__builtins__[_ip].user_ns[aproposName]'
>                          in lst)
>
>         self.assertEqual(aproposName('foo', Composite(), name='name'),
>                          ['name.foo'])
>
>     def testMaxDepth(self):
>         lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3),
>                           maxDepth=0)
>         self.assertFalse('arg][foo][foo]' in lst)
>         self.assertFalse('arg][foo]' in lst)
>
>         lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3),
>                           maxDepth=1)
>         self.assertFalse('arg[foo][foo]' in lst)
>         self.assertTrue('arg[foo]' in lst)
>
>         lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3),
>                           maxDepth=2)
>         self.assertTrue('arg[foo][foo]' in lst)
>         self.assertTrue('arg[foo]' in lst)
>
>         lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3))
>         self.assertTrue('arg[foo][foo]' in lst)
>         self.assertTrue('arg[foo]' in lst)
>
>     # FIXME -- Sometimes causes bus error?
>     def disable_testModuleSearch(self):
>         # Sequester the long-running test.
>         lst = aproposName('aproposName', aproposModule)
>         self.assertTrue('apropos.aproposName' in lst)
>         self.assertTrue('apropos.aproposNameRegexp' in lst)
>         self.assertTrue('apropos.__builtins__[_ip].user_ns[aproposName]'
>                         in lst)
>
>     def testSyntax(self):
>         """Functionality has been tested... just make sure that these
>         functions can be called"""
>         class Composite:
>             def __init__(self, str):
>                 self.__doc__ = str
>
>         self.assertEqual(aproposValue('foo', dict(a=1, bar='foo')),
>                          ['arg[bar]'])
>         self.assertEqual(aproposDoc('foo', Composite('foo')),
>                          ['arg'])
>         self.assertEqual(aproposNameRegexp ('^foo', dict(foo=1, barfoo=2)),
>                          ['arg[foo]'])
>         self.assertEqual(aproposValueRegexp ('^foo', dict(bar='foo',
>                                                           the='afoo')),
>                          ['arg[bar]'])
>         self.assertEqual(aproposDocRegexp ('^foo', Composite('foo')),
>                          ['arg'])
>         self.assertEqual(aproposDocRegexp ('^foo', Composite('theFoo')),
>                          [])
>
>     def testNullIntrospector(self):
>         i = NullIntrospector()
>         # I think this is how this is supposed to work
>         self.assertEqual(id(i), id(i.__iter__()))
>         self.assertRaises(StopIteration, i.next)
>
>         # make sure code doens't freak out
>         i = NullIntrospector(exclude='_')
>
>     def testListIntrospector(self):
>         i = ListIntrospector([1,2])
>         self.assertEqual(id(i), id(i.__iter__()))
>         self.assertEqual(i.next(), (1, None, '[0]'))
>         self.assertEqual(i.next(), (2, None, '[1]'))
>         self.assertRaises(StopIteration, i.next)
>
>         # make sure code doens't freak out
>         i = ListIntrospector([1,2], exclude='_')
>
>     def testInstanceIntrospector(self):
>         class Composite:
>             pass
>
>         c = Composite()
>         c.a = 1
>         c.b = 2
>
>         lst = [el for el in InstanceIntrospector(c)]
>         # depending on how I'm running the test, one or the other of
>         # these should be in the list
>         self.assertTrue(('test_apropos', '__module__', '.__module__') in lst
>                         or ('__builtin__', '__module__', '.__module__') in lst)
>         self.assertTrue((None, '__doc__', '.__doc__') in lst)
>         self.assertTrue((1, 'a', '.a') in lst)
>         self.assertTrue((2, 'b', '.b') in lst)
>         self.assertEqual(len(lst), 4)
>
>         lst = [el for el in InstanceIntrospector(c, exclude='_')]
>         self.assertFalse(() in lst)
>         self.assertFalse((None, None, '.__doc__') in lst)
>         self.assertEqual(len(lst), 2)
>
>     def testDictIntrospector(self):
>         lst = [el for el in DictIntrospector(dict(a=1,_b=2))]
>
>         self.assertEqual(len(lst), 2)
>         self.assertTrue((1, 'a', '[a]') in lst)
>         self.assertTrue((2, '_b', '[_b]') in lst)
>
>         lst = [el for el in DictIntrospector(dict(a=1,_b=2), exclude='_')]
>         self.assertEqual(len(lst), 1)
>         self.assertTrue((1, 'a', '[a]') in lst)
>         self.assertFalse((2, '_b', '[_b]') in lst)
>
>     def testSearchName(self):
>         self.assertTrue(searchName('needle', 'the needle', None))
>         self.assertTrue(searchName('needle', 'needle more', None))
>         self.assertTrue(searchName('needle', 'the needle more', None))
>
>         # Make sure function doesn't freak out for no name
>         self.assertFalse(searchName('needle', None, None))
>
>     def testSearchValue(self):
>         class Composite:
>             def __init__(self, str):
>                 self._str = str
>             def __repr__(self):
>                 return self._str
>             def __str__(self):
>                 return self._str
>
>         self.assertTrue(searchValue('needle', None,
>                                     Composite('the needle')))
>         self.assertTrue(searchValue('needle', None,
>                                     Composite('needle more')))
>         self.assertTrue(searchValue('needle', None,
>                                     Composite('the needle more')))
>         # These are not true because searchValue doens't split
>         # apart built-in containers
>         self.assertFalse(searchValue('needle', None,
>                                     ['needle', 2, 3]))
>         self.assertFalse(searchValue('needle', None,
>                                     ('needle', 2, 3)))
>         self.assertFalse(searchValue('needle', None,
>                                     dict(a='needle', b=2, c=3)))
>
>
>     def testSearchDoc(self):
>         class Composite:
>             def __init__(self, str):
>                 self.__doc__ = str
>
>         self.assertTrue(searchDoc('needle', None,
>                                   Composite('the needle')))
>         self.assertTrue(searchDoc('needle', None,
>                                   Composite('needle more')))
>         self.assertTrue(searchDoc('needle', None,
>                                   Composite('the needle more')))
>
>         # Make sure search fn doesn't freak out
>         self.assertFalse(searchDoc('needle', None,
>                                    Composite(None)))
>
>
>     def testSearchNameRegexp(self):
>         self.assertFalse(searchNameRegexp('^needle', 'the needle', None))
>         self.assertTrue(searchNameRegexp('^needle', 'needle more', None))
>         self.assertFalse(searchNameRegexp('^needle', 'the needle more', None))
>
>         # Make sure function doesn't freak out for no name
>         self.assertFalse(searchName('^needle', None, None))
>
>     def testSearchValueRegexp(self):
>         class Composite:
>             def __init__(self, str):
>                 self._str = str
>             def __repr__(self):
>                 return self._str
>             def __str__(self):
>                 return self._str
>
>         self.assertFalse(searchValueRegexp('^needle', None,
>                                            Composite('the needle')))
>         self.assertTrue(searchValueRegexp('^needle', None,
>                                           Composite('needle more')))
>         self.assertFalse(searchValueRegexp('^needle', None,
>                                            Composite('the needle more')))
>
>         # Make sure we don't search inside containers
>         self.assertFalse(searchValueRegexp('needle', None,
>                                            ['needle', 2, 3]))
>         self.assertFalse(searchValueRegexp('needle', None,
>                                            ('needle', 2, 3)))
>         self.assertFalse(searchValueRegexp('needle', None,
>                                            dict(a='needle', b=2, c=3)))
>
>     def testSearchDocRegexp(self):
>         class Composite:
>             def __init__(self, str):
>                 self.__doc__ = str
>
>         self.assertFalse(searchDocRegexp('^needle', None,
>                                          Composite('the needle')))
>         self.assertTrue(searchDocRegexp('^needle', None,
>                                         Composite('needle more')))
>         self.assertFalse(searchDocRegexp('^needle', None,
>                                          Composite('the needle more')))
>
>         # Make sure function doesn't freak out if no doc
>         self.assertFalse(searchDocRegexp('^needle', None,
>                                          Composite(None)))
>
> def suite():
>     suites = [unittest.TestLoader().loadTestsFromTestCase(test)
>               for test in (AproposTest,)]
>     return unittest.TestSuite(suites)
>
> def test():
>     unittest.TextTestRunner().run(suite())
>
> def itest():
>     suite().debug()
>
> import types
> import re
>
> __version__ = 0.2
> __author__ = "Greg Novak <novak@ucolick.org"
> # Date: January 14, 2007
> # Code released public domain.  Do whatever you want with it.
>
> # You can add your own types to these lists if you want apropos to
> # descend into them.  If you have a container that you want apropos to
> # search, but it doesn't respond appropriately to the methods listed
> # below, you can give it a function called __apropos__.  This function
> # takes no arguments and should return an iterator.  The iterator
> # should return the contents of the object, as tuples of
> # (elementObject, nameString, accessString)
>
> # Must respond to __iter__ and [string].  Designed for things you
> # access via [string]
> dictTypes = [types.DictType]
> # Must respond to __iter__().  Designed for things you access via
> # [int]
> listTypes = [types.ListType, types.TupleType]
> # Must give sensible results to dir(), getattr().  Designed for things
> # you access via .
> instanceTypes = [types.InstanceType, types.ModuleType]
>
> ##################################################
> ## Interface
>
> ## Common Usage
> def aproposName(needle, haystack=None, **kw):
>     """Recursively search for attributes with where needle is a
>     substring of the name.  See apropos() for addtional keyword
>     arguments.  Typical usage is aproposName('string', module).
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     return apropos(needle, haystack, searchFn=searchName, **kw)
>
> def aproposValue(needle, haystack=None, **kw):
>     """Recursively search for attributes with where needle is a
>     substring the string representation of the object.  See apropos()
>     for addtional keyword arguments.  Typical usage is
>     aproposValue('string', module).
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     return apropos(needle, haystack, searchFn=searchValue, **kw)
>
> def aproposDoc(needle, haystack=None, **kw):
>     """Recursively search for attributes with where needle is a
>     substring of the documentation string of the object.  See
>     apropos() for addtional keyword arguments.  Typical usage is
>     aproposDoc('string', module).
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     return apropos(needle, haystack, searchFn=searchDoc, **kw)
>
> def aproposNameRegexp (needle, haystack=None, **kw):
>     """Recursively search for attributes with where needle is a regexp
>     matching the name.  See apropos() for addtional keyword arguments.
>     Typical usage is aproposNameRegexp('string', module).
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     return apropos(needle, haystack, searchFn=searchNameRegexp, **kw)
>
> def aproposValueRegexp(needle, haystack=None, **kw):
>     """Recursively search for attributes with where needle is a regexp
>     matching the string representation of the object.  See apropos()
>     for addtional keyword arguments.  Typical usage is
>     aproposValueRegexp('string', module).
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     return apropos(needle, haystack, searchFn=searchValueRegexp, **kw)
>
> def aproposDocRegexp(needle, haystack=None, **kw):
>     """Recursively search for attributes with where needle is a regexp
>     matching the docstring of the object.  See apropos() for addtional
>     keyword arguments.  Typical usage is aproposDocRegexp('string',
>     module).
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     return apropos(needle, haystack, searchFn=searchDocRegexp, **kw)
>
> ## Handles default values of arguments
> def apropos(needle, haystack=None, name=None,
>             searchFn=None, **kw):
>     """Recursively search through haystack looking for needle.
>     Typical usage is apropos('string', module).
>
>     haystack can be any python object.  Typically it's a module.  If
>     it's not given, it's the dict returned by globals() (ie, watch
>     out, it's going to take a while).
>
>     name is the name of the top level object.  It's first bit of the
>     'accessor' strings that are returned.  If not specified, defaults
>     to 'arg'.
>
>     Matches determined by searchFn.  searchFn(needle, name, obj)
>     returns true if the object should be considered a match.  By
>     default, searchFn matches if needle is a substring of the name of
>     the object.
>
>     Return a list of strings showing the path to reach the matching
>     object"""
>     if haystack is None:
>         haystack = globals()
>         name = ''
>     elif name is None:
>         if hasattr(haystack, "__name__"):
>             name = haystack.__name__
>         else:
>             name = 'arg'
>
>     if searchFn is None: searchFn = searchName
>
>     return _apropos(needle, haystack, name, searchFn, **kw)
>
> ##################################################
> ## Common search functions
>
> def searchName(needle, name, obj):
>     return name and needle in name
>
> def searchValue(needle, name, obj):
>     # String representation of dicts, lists, and tuples includes the
>     # objects within them, so don't consider that to be a match on the
>     # desired value.  Wait to get inside the container class...
>     #
>     # TODO What I really want to do is match the container if none of
>     # its contents matched.
>     if type(obj) not in (types.TupleType, types.ListType,
>                          types.DictType):
>         return needle in str(obj)
> # NOTE -- should be repr()?
>
> def searchDoc(needle, name, obj):
>     return hasattr(obj, '__doc__') and obj.__doc__ \
>            and needle in obj.__doc__
>
> def searchNameRegexp(needle, name, obj):
>     return name and re.search(needle, name)
>
> def searchValueRegexp(needle, name, obj):
>     if type(obj) not in (types.TupleType, types.ListType,
>                          types.DictType):
>         return re.search(needle, str(obj))
>
> def searchDocRegexp(needle, name, obj):
>     return hasattr(obj, '__doc__') \
>            and obj.__doc__ \
>            and re.search(needle, obj.__doc__)
>
> ##################################################
> ## The guts
>
> def _apropos(needle, haystack, haystackName,
>              searchFn, maxDepth=None, **kw):
>     """Recursively search through haystack looking for needle.
>
>     haystack can be any python object.  Typically it's a module.  If
>     it's not given, it's the dict returned by globals() (ie, watch
>     out, it's going to take a while).
>
>     Matches determined by searchFn.  searchFn(needle, name, obj)
>     returns true if the object should be considered a match.  By
>     default, searchFn matches if needle is a substring of the name of
>     the object.
>
>     name is the name of the top level object.  It's first bit of the
>     'accessor' strings that are returned.  If not specified, defaults
>     to 'arg'.
>
>     Return a list of strings showing the path to reach the matching
>     object."""
>     def search(haystack, haystackName, fullName, depth):
>         '''Free variable: needle, searchTypes'''
>         # print "Searched", len(searchedIds), "Searching", depth, fullName
>         if searchFn(needle, haystackName, haystack):
>             found.append(fullName)
>
>         # break apart if obj is not already searched
>         if type(haystack) in searchTypes \
>                 and (not maxDepth or depth < maxDepth) \
>                 and id(haystack) not in searchedIds:
>             # Prevent loops with circular references by setting this
>             # _before_ descending into sub-objects
>             searchedIds.append(id(haystack))
>
>             for hay, hayName, hayAccess in introspect(haystack, **kw):
>                 search(hay, hayName, fullName + hayAccess, depth+1)
>
>     searchedIds = []
>     found = []
>     searchTypes = dictTypes + listTypes + instanceTypes
>
>     search(haystack, haystackName, haystackName, 0)
>     return found
>
> def introspect(obj, **kw):
>     if type(obj) in dictTypes:
>         return DictIntrospector(obj, **kw)
>     if type(obj) in listTypes:
>         return ListIntrospector(obj, **kw)
>     if type(obj) in instanceTypes:
>         return InstanceIntrospector(obj, **kw)
>
>     # User objects
>     if hasattr(obj, '__apropos__'):
>         return obj.__apropos__(**kw)
>
>     # Stymied
>     print "apropos.py: Warning, don't know how to deal with " + str(obj)
>     return NullIntrospector()
>
> # NOTE These introspectors simplify the code, but they seem to take about five
> # times as long, very unfortunately.
> class Introspector (object):
>     def __iter__(self):
>         return self
>
>     def next(self):
>         pass
>
> class NullIntrospector (Introspector):
>     def __init__(self, **kw):
>         pass
>
>     def next(self):
>         raise StopIteration
>
> class DictIntrospector (Introspector):
>     # types that respond to __iter__, obj.[key] to get a value
>     def __init__(self, dict, exclude=None):
>         self.dict = dict
>         self.iter = self.dict.__iter__()
>         self.exclude = exclude
>
>     def next(self):
>         # return tuple of obj, name, accessName
>         k = self.iter.next()
>         # FIXME -- completely skip non-string key entries
>         while type(k) is not types.StringType \
>               or (self.exclude and k.startswith(self.exclude)):
>             k = self.iter.next()
>         return self.dict[k], k, '[' + k + ']'
>
> class ListIntrospector (Introspector):
>     # types that respond to __iter__
>     def __init__(self, list, exclude=None):
>         self.list = list
>         self.iter = self.list.__iter__()
>         self.i = 0
>
>     def next(self):
>         # return tuple of obj, name, accessName
>         self.i += 1
>         return self.iter.next(), None, '[' + str(self.i-1) + ']'
>
> class InstanceIntrospector (Introspector):
>     # classes that respond to dir and getattr
>     def __init__(self, inst, exclude=None):
>         self.inst = inst
>         self.iter = dir(self.inst).__iter__()
>         self.exclude = exclude
>
>     def next(self):
>         # return tuple of obj, name, accessName
>
>         # IPython structs allow non-string attributes.  Filter them
>         # out because they cause problems.  That is, you have to
>         # access them via obj[1], not getattr(obj, 1) or
>         # getattr(obj, '1')
>         # FIXME -- filter out non-string things that appear in dir()
>
>         name = self.iter.next()
>         while type(name) is not types.StringType \
>               or (self.exclude and name.startswith(self.exclude)):
>             name = self.iter.next()
>         return getattr(self.inst, name), name, "." + name
>
>
>


More information about the IPython-dev mailing list