Source code for xdress.autoall

"""This module is used to scrape the all of the APIs from a given source file
and return their name and kind.  These include classes, structs, functions,
and certain variable types.  It is not used to actually describe these elements.
That is the job of the autodescriber.

This module is available as an xdress plugin by the name ``xdress.autoall``.

Including this plugin enables the ``classes``, ``functions``, and ``variables``  
run control parameters to have an asterix ('*') in the name positon (index 0).
For example, rather than writing::

    classes = [
        ('People', 'people'),
        ('JoanOfArc', 'people'),
        ('JEdgaHoover', 'people'),
        ('Leslie', 'people'),
        ('HuaMulan', 'people'),
        ]

we can instead simply write::

    classes = [('*', 'people')]

Isn't this grand?!

:author: Anthony Scopatz <scopatz@gmail.com>

Automatic Finder API
====================
"""
from __future__ import print_function
import os
import io
import re
import sys
from hashlib import md5
from pprint import pprint, pformat
from warnings import warn
try:
    import cPickle as pickle
except ImportError:
    import pickle

try:
    import pycparser
except ImportError:
    pycparser = None

try:
    from . import clang
    from .clang.cindex import CursorKind
except ImportError:
    clang = None

from . import utils
from . import astparsers

from .utils import find_source, FORBIDDEN_NAMES, NotSpecified, RunControl, apiname, \
    ensure_apiname

if os.name == 'nt':
    import ntpath
    import posixpath

if sys.version_info[0] >= 3:
    basestring = str

[docs]class GccxmlFinder(object): """Class used for discovering APIs using an etree representation of the GCC-XML AST.""" def __init__(self, root=None, onlyin=None, verbose=False): """Parameters ------------- root : element tree node, optional The root element node of the AST. onlyin : str, optional Filename to search, prevents finding APIs coming from other libraries. verbose : bool, optional Flag to display extra information while visiting the file. """ self.verbose = verbose self._root = root origonlyin = onlyin onlyin = [onlyin] if isinstance(onlyin, basestring) else onlyin onlyin = set() if onlyin is None else set(onlyin) onlyin = [root.find("File[@name='{0}']".format(oi)) for oi in onlyin] self.onlyin = set([oi.attrib['id'] for oi in onlyin if oi is not None]) if 0 == len(self.onlyin): msg = ("None of these files are present: {0!r}; " "autodescribing will probably fail.") msg = msg.format(origonlyin) warn(msg, RuntimeWarning) self.variables = [] self.functions = [] self.classes = [] def __str__(self): return ("vars = " + pformat(self.variables) + "\n" + "funcs = " + pformat(self.functions) + "\n" + "classes = " + pformat(self.classes) + "\n") def _pprint(self, node): if self.verbose: print("Auto-Found: {0} {1} {2}".format(node.tag, node.attrib.get('id', ''), node.attrib.get('name', None)))
[docs] def visit(self, node=None): """Visits the node and all sub-nodes, filling the API names as it goes. Parameters ---------- node : element tree node, optional The element tree node to start from. If this is None, then the top-level node is found and visited. """ node = node or self._root self.variables += self.visit_kinds(node, "Enumeration") self.functions += self.visit_kinds(node, "Function") self.classes += self.visit_kinds(node, ["Class", "Struct"])
[docs] def visit_kinds(self, node, kinds): """Visits the node and all sub-nodes, finding instances of the kinds and recording the names as it goes. Parameters ---------- node : element tree node The element tree node to start from. kinds : str or sequence of str The API elements to find. Returns ------- names : list of str Names of the API elements in this file that match the kinds provided. """ if not isinstance(kinds, basestring): names = [] for k in kinds: names += self.visit_kinds(node, k) names = [n for n in names if n not in FORBIDDEN_NAMES] return names names = set() for child in node.iterfind(".//" + kinds): if child.attrib.get('file', None) not in self.onlyin: continue name = child.attrib.get('name', '_') if name.startswith('_'): continue if name in FORBIDDEN_NAMES: continue names.add(utils.parse_template(name)) self._pprint(child) return sorted(names)
[docs]def gccxml_findall(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), verbose=False, debug=False, builddir='build', language='c++', clang_includes=()): """Automatically finds all API elements in a file via GCC-XML. Parameters ---------- filename : str The path to the file includes : list of str, optional The list of extra include directories to search for header files. defines : list of str, optional The list of extra macro definitions to apply. undefines : list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. verbose : bool, optional Flag to diplay extra information while describing the class. debug : bool, optional Flag to enable/disable debug mode. builddir : str, optional Location of -- often temporary -- build files. language : str Valid language flag. clang_includes : ignored Returns ------- variables : list of strings A list of variable names to wrap from the file. functions : list of strings A list of function names to wrap from the file. classes : list of strings A list of class names to wrap from the file. """ if os.name == 'nt': # GCC-XML and/or Cygwin wants posix paths on Windows. filename = posixpath.join(*ntpath.split(filename)) root = astparsers.gccxml_parse(filename, includes=includes, defines=defines, undefines=undefines, extra_parser_args=extra_parser_args, verbose=verbose, debug=debug, builddir=builddir) basename = filename.rsplit('.', 1)[0] onlyin = set([filename] + [basename + '.' + h for h in utils._hdr_exts if h.startswith('h')]) finder = GccxmlFinder(root, onlyin=onlyin, verbose=verbose) finder.visit() return finder.variables, finder.functions, finder.classes
[docs]def clang_findall(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), verbose=False, debug=False, builddir='build', language='c++', clang_includes=()): """Automatically finds all API elements in a file via clang. Parameters ---------- filename : str The path to the file includes : list of str, optional The list of extra include directories to search for header files. defines : list of str, optional The list of extra macro definitions to apply. undefines : list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. language : str Valid language flag. verbose : Ignored debug : Ignored builddir : Ignored clang_includes : list of str, optional clang-specific include paths. Returns ------- variables : list of strings A list of variable names to wrap from the file. functions : list of strings A list of function names to wrap from the file. classes : list of strings A list of class names to wrap from the file. """ tu = astparsers.clang_parse(filename, includes=includes, defines=defines, undefines=undefines, extra_parser_args=extra_parser_args, verbose=verbose, debug=debug, language=language, clang_includes=clang_includes) basename = filename.rsplit('.', 1)[0] onlyin = frozenset([filename] + [basename + '.' + h for h in utils._hdr_exts if h.startswith('h')]) variables, functions, classes = [],[],[] def visit(node): kind = node.kind if kind == CursorKind.NAMESPACE: for kid in node.get_children(): visit(kid) elif kind == CursorKind.ENUM_DECL: variables.append(node.spelling) elif kind == CursorKind.FUNCTION_DECL: functions.append(node.spelling) elif kind in (CursorKind.CLASS_DECL,CursorKind.STRUCT_DECL): classes.append(node.spelling) for node in tu.cursor.get_children(): file = node.extent.start.file if file and file.name in onlyin: visit(node) return variables, functions, classes
[docs]class PycparserFinder(astparsers.PycparserNodeVisitor): """Class used for discovering APIs using the pycparser AST.""" def __init__(self, root=None, onlyin=None, verbose=False): """Parameters ------------- root : element tree node, optional The root element node of the AST. onlyin : str, optional Filename to search, prevents finding APIs coming from other libraries. verbose : bool, optional Flag to display extra information while visiting the file. """ super(PycparserFinder, self).__init__() self.verbose = verbose self._root = root self.onlyin = onlyin self.variables = [] self.functions = [] self.classes = [] def __str__(self): return ("vars = " + pformat(self.variables) + "\n" + "funcs = " + pformat(self.functions) + "\n" + "classes = " + pformat(self.classes) + "\n") def _pprint(self, node): if self.verbose: node.show()
[docs] def visit(self, node=None): """Visits the node and all sub-nodes, filling the API names as it goes. Parameters ---------- node : element tree node, optional The element tree node to start from. If this is None, then the top-level node is found and visited. """ node = node or self._root super(PycparserFinder, self).visit(node)
def visit_Enum(self, node): if node.coord.file not in self.onlyin: return name = node.name if name.startswith('_'): return if name in FORBIDDEN_NAMES: return self._pprint(node) self.variables.append(name) def visit_FuncDecl(self, node): if node.coord.file not in self.onlyin: return if isinstance(node.type, pycparser.c_ast.PtrDecl): name = node.type.type.declname else: name = node.type.declname if name is None or name.startswith('_'): return if name in FORBIDDEN_NAMES: return self._pprint(node) self.functions.append(name) def visit_Struct(self, node): if node.coord.file not in self.onlyin: return name = node.name if name is None: self._status = "<name-not-found>" return if name.startswith('_'): return if name in FORBIDDEN_NAMES: return self._pprint(node) self.classes.append(name) def visit_Typedef(self, node): if node.coord.file not in self.onlyin: return self._pprint(node) self._status = None self.visit(node.type) stat = self._status self._status = None if stat is None: return if stat == "<name-not-found>": name = node.name if name is None or name.startswith('_'): return if name in FORBIDDEN_NAMES: return self.classes.append(name)
[docs]def pycparser_findall(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), verbose=False, debug=False, builddir='build', language='c', clang_includes=()): """Automatically finds all API elements in a file via GCC-XML. Parameters ---------- filename : str The path to the file includes : list of str, optional The list of extra include directories to search for header files. defines : list of str, optional The list of extra macro definitions to apply. undefines : list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. verbose : bool, optional Flag to diplay extra information while describing the class. debug : bool, optional Flag to enable/disable debug mode. builddir : str, optional Location of -- often temporary -- build files. language : str Valid language flag. clang_includes : ignored Returns ------- variables : list of strings A list of variable names to wrap from the file. functions : list of strings A list of function names to wrap from the file. classes : list of strings A list of class names to wrap from the file. """ root = astparsers.pycparser_parse(filename, includes=includes, defines=defines, undefines=undefines, extra_parser_args=extra_parser_args, verbose=verbose, debug=debug, builddir=builddir) basename = filename.rsplit('.', 1)[0] onlyin = set([filename, basename + '.h']) finder = PycparserFinder(root, onlyin=onlyin, verbose=verbose) finder.visit() return finder.variables, finder.functions, finder.classes # # Top-level function #
_finders = { 'clang': clang_findall, 'gccxml': gccxml_findall, 'pycparser': pycparser_findall, }
[docs]def findall(filename, includes=(), defines=('XDRESS',), undefines=(), extra_parser_args=(), parsers='gccxml', verbose=False, debug=False, builddir='build', language='c++', clang_includes=()): """Automatically finds all API elements in a file. This is the main entry point. Parameters ---------- filename : str The path to the file. includes: list of str, optional The list of extra include directories to search for header files. defines: list of str, optional The list of extra macro definitions to apply. undefines: list of str, optional The list of extra macro undefinitions to apply. extra_parser_args : list of str, optional Further command line arguments to pass to the parser. parsers : str, list, or dict, optional The parser / AST to use to use for the file. Currently 'clang', 'gccxml', and 'pycparser' are supported, though others may be implemented in the future. If this is a string, then this parser is used. If this is a list, this specifies the parser order to use based on availability. If this is a dictionary, it specifies the order to use parser based on language, i.e. ``{'c' ['pycparser', 'gccxml'], 'c++': ['gccxml', 'pycparser']}``. verbose : bool, optional Flag to diplay extra information while describing the class. debug : bool, optional Flag to enable/disable debug mode. builddir : str, optional Location of -- often temporary -- build files. language : str Valid language flag. clang_includes : list of str, optional clang-specific include paths. Returns ------- variables : list of strings A list of variable names to wrap from the file. functions : list of strings A list of function names to wrap from the file. classes : list of strings A list of class names to wrap from the file. """ parser = astparsers.pick_parser(language, parsers) finder = _finders[parser] rtn = finder(filename, includes=includes, defines=defines, undefines=undefines, extra_parser_args=extra_parser_args, verbose=verbose, debug=debug, builddir=builddir, language=language, clang_includes=clang_includes) return rtn # # Persisted Cache for great speed up #
[docs]class AutoNameCache(object): """A quick persistent cache for name lists automatically found in files. The keys are (classname, filename, kind) tuples. The values are (hashes-of-the-file, finder-results) tuples.""" def __init__(self, cachefile=os.path.join('build', 'autoname.cache')): """Parameters ------------- cachefile : str, optional Path to description cachefile. """ self.cachefile = cachefile if os.path.isfile(cachefile): with io.open(cachefile, 'rb') as f: self.cache = pickle.load(f) else: self.cache = {}
[docs] def isvalid(self, filename): """Boolean on whether the cach value for a filename matches the state of the file on the system.""" key = filename if key not in self.cache: return False cachehash = self.cache[key][0] with io.open(filename, 'rb') as f: filebytes = f.read() currhash = md5(filebytes).hexdigest() return cachehash == currhash
def __getitem__(self, key): return self.cache[key][1] # return the results of the finder only def __setitem__(self, key, value): filename = key with io.open(filename, 'rb') as f: filebytes = f.read() currhash = md5(filebytes).hexdigest() self.cache[key] = (currhash, value) def __delitem__(self, key): del self.cache[key]
[docs] def dump(self): """Writes the cache out to the filesystem.""" if not os.path.exists(self.cachefile): pardir = os.path.split(self.cachefile)[0] if not os.path.exists(pardir): os.makedirs(pardir) with io.open(self.cachefile, 'wb') as f: pickle.dump(self.cache, f, pickle.HIGHEST_PROTOCOL)
def __str__(self): return pformat(self.cache) # # Plugin #
[docs]class XDressPlugin(astparsers.ParserPlugin): """This plugin resolves the '*' syntax in wrapper types by parsing the source files prio to describing them. """ allsrc = varhasstar = fnchasstar = clshasstar = None def defaultrc(self): rc = RunControl() rc._update(super(XDressPlugin, self).defaultrc) return rc def report_debug(self, rc): msg = super(XDressPlugin, self).report_debug(rc) msg += "Autoall:\n\n" msg += "allsrc = {0}\n\n".format(pformat(self.allsrc)) msg += "varhasstar = {0}\n\n".format(pformat(self.varhasstar)) msg += "fnchasstar = {0}\n\n".format(pformat(self.fnchasstar)) msg += "clshasstar = {0}\n\n".format(pformat(self.clshasstar)) return msg
[docs] def setup(self, rc): """Expands variables, functions, and classes in the rc based on copying src filenames to tar filename and the special '*' all syntax.""" super(XDressPlugin, self).setup(rc) self.setup_basic(rc) self.setup_heavy(rc)
def execute(self, rc): # dummy pass # Helper methods
[docs] def setup_basic(self, rc): """Does the easy part of setting up an autodecsibe environment""" # first pass -- gather and expand target allsrc = {} varhasstar = False for i, var in enumerate(rc.variables): rc.variables[i] = var = ensure_apiname(var) if var.srcname == '*': allsrc.update(zip(var.srcfiles, [var.language]*len(var.srcfiles))) varhasstar = True fnchasstar = False for i, fnc in enumerate(rc.functions): rc.functions[i] = fnc = ensure_apiname(fnc) if fnc.srcname == '*': allsrc.update(zip(fnc.srcfiles, [fnc.language]*len(fnc.srcfiles))) #allsrc.update(fnc.srcfiles) fnchasstar = True clshasstar = False for i, cls in enumerate(rc.classes): rc.classes[i] = cls = ensure_apiname(cls) if cls.srcname == '*': allsrc.update(zip(cls.srcfiles, [cls.language]*len(cls.srcfiles))) #allsrc.update(cls.srcfiles) clshasstar = True self.allsrc = allsrc self.varhasstar = varhasstar self.fnchasstar = fnchasstar self.clshasstar = clshasstar
[docs] def setup_heavy(self, rc): """Does the hard work of actually searching the source files.""" print("autoall: discovering API names") if not self.varhasstar and not self.fnchasstar and not self.clshasstar: print("autoall: no API names to discover!") return allsrc = self.allsrc kinds = ['variables', 'functions', 'classes'] # second pass -- find all allfiles = {} cachefile = os.path.join(rc.builddir, 'autoname.cache') autonamecache = AutoNameCache(cachefile=cachefile) for i, (srcfile, lang) in enumerate(allsrc.items()): print("autoall: searching {0}".format(srcfile)) if autonamecache.isvalid(srcfile): found = autonamecache[srcfile] else: found = findall(srcfile, includes=rc.includes, defines=rc.defines, undefines=rc.undefines, extra_parser_args=rc.extra_parser_args, parsers=rc.parsers, verbose=rc.verbose, debug=rc.debug, builddir=rc.builddir, language=lang, clang_includes=rc.clang_includes) autonamecache[srcfile] = found autonamecache.dump() allfiles[srcfile] = found for k, kind in enumerate(kinds): if 0 < len(found[k]): fstr = ", ".join([str(_) for _ in found[k]]) print("autoall: found {0}: {1}".format(kind, fstr)) if 0 == i%rc.clear_parser_cache_period: astparsers.clearmemo() # third pass -- replace *s if self.varhasstar: newvars = [] for var in rc.variables: if var.srcname == '*': for srcfile in var.srcfiles: for x in allfiles[srcfile][0]: newvar = var._replace(srcname=x, tarname=x) if newvar not in newvars: newvars.append(newvar) else: newvars.append(var) rc.variables = newvars if self.fnchasstar: newfncs = [] for fnc in rc.functions: if fnc.srcname == '*': for srcfile in fnc.srcfiles: for x in allfiles[srcfile][1]: newfnc = fnc._replace(srcname=x, tarname=x) if newfnc not in newfncs: newfncs.append(newfnc) else: newfncs.append(fnc) rc.functions = newfncs if self.clshasstar: newclss = [] for cls in rc.classes: if cls.srcname == '*': for srcfile in cls.srcfiles: for x in allfiles[srcfile][2]: newcls = cls._replace(srcname=x, tarname=x) if newcls not in newclss: newclss.append(newcls) else: newclss.append(cls) rc.classes = newclss