168 lines
6.7 KiB
Python
168 lines
6.7 KiB
Python
"""A bottom-up tree matching algorithm implementation meant to speed
|
|
up 2to3's matching process. After the tree patterns are reduced to
|
|
their rarest linear path, a linear Aho-Corasick automaton is
|
|
created. The linear automaton traverses the linear paths from the
|
|
leaves to the root of the AST and returns a set of nodes for further
|
|
matching. This reduces significantly the number of candidate nodes."""
|
|
|
|
__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
|
|
|
|
import logging
|
|
import itertools
|
|
from collections import defaultdict
|
|
|
|
from . import pytree
|
|
from .btm_utils import reduce_tree
|
|
|
|
class BMNode(object):
|
|
"""Class for a node of the Aho-Corasick automaton used in matching"""
|
|
count = itertools.count()
|
|
def __init__(self):
|
|
self.transition_table = {}
|
|
self.fixers = []
|
|
self.id = next(BMNode.count)
|
|
self.content = ''
|
|
|
|
class BottomMatcher(object):
|
|
"""The main matcher class. After instantiating the patterns should
|
|
be added using the add_fixer method"""
|
|
|
|
def __init__(self):
|
|
self.match = set()
|
|
self.root = BMNode()
|
|
self.nodes = [self.root]
|
|
self.fixers = []
|
|
self.logger = logging.getLogger("RefactoringTool")
|
|
|
|
def add_fixer(self, fixer):
|
|
"""Reduces a fixer's pattern tree to a linear path and adds it
|
|
to the matcher(a common Aho-Corasick automaton). The fixer is
|
|
appended on the matching states and called when they are
|
|
reached"""
|
|
self.fixers.append(fixer)
|
|
tree = reduce_tree(fixer.pattern_tree)
|
|
linear = tree.get_linear_subpattern()
|
|
match_nodes = self.add(linear, start=self.root)
|
|
for match_node in match_nodes:
|
|
match_node.fixers.append(fixer)
|
|
|
|
def add(self, pattern, start):
|
|
"Recursively adds a linear pattern to the AC automaton"
|
|
#print("adding pattern", pattern, "to", start)
|
|
if not pattern:
|
|
#print("empty pattern")
|
|
return [start]
|
|
if isinstance(pattern[0], tuple):
|
|
#alternatives
|
|
#print("alternatives")
|
|
match_nodes = []
|
|
for alternative in pattern[0]:
|
|
#add all alternatives, and add the rest of the pattern
|
|
#to each end node
|
|
end_nodes = self.add(alternative, start=start)
|
|
for end in end_nodes:
|
|
match_nodes.extend(self.add(pattern[1:], end))
|
|
return match_nodes
|
|
else:
|
|
#single token
|
|
#not last
|
|
if pattern[0] not in start.transition_table:
|
|
#transition did not exist, create new
|
|
next_node = BMNode()
|
|
start.transition_table[pattern[0]] = next_node
|
|
else:
|
|
#transition exists already, follow
|
|
next_node = start.transition_table[pattern[0]]
|
|
|
|
if pattern[1:]:
|
|
end_nodes = self.add(pattern[1:], start=next_node)
|
|
else:
|
|
end_nodes = [next_node]
|
|
return end_nodes
|
|
|
|
def run(self, leaves):
|
|
"""The main interface with the bottom matcher. The tree is
|
|
traversed from the bottom using the constructed
|
|
automaton. Nodes are only checked once as the tree is
|
|
retraversed. When the automaton fails, we give it one more
|
|
shot(in case the above tree matches as a whole with the
|
|
rejected leaf), then we break for the next leaf. There is the
|
|
special case of multiple arguments(see code comments) where we
|
|
recheck the nodes
|
|
|
|
Args:
|
|
The leaves of the AST tree to be matched
|
|
|
|
Returns:
|
|
A dictionary of node matches with fixers as the keys
|
|
"""
|
|
current_ac_node = self.root
|
|
results = defaultdict(list)
|
|
for leaf in leaves:
|
|
current_ast_node = leaf
|
|
while current_ast_node:
|
|
current_ast_node.was_checked = True
|
|
for child in current_ast_node.children:
|
|
# multiple statements, recheck
|
|
if isinstance(child, pytree.Leaf) and child.value == ";":
|
|
current_ast_node.was_checked = False
|
|
break
|
|
if current_ast_node.type == 1:
|
|
#name
|
|
node_token = current_ast_node.value
|
|
else:
|
|
node_token = current_ast_node.type
|
|
|
|
if node_token in current_ac_node.transition_table:
|
|
#token matches
|
|
current_ac_node = current_ac_node.transition_table[node_token]
|
|
for fixer in current_ac_node.fixers:
|
|
if not fixer in results:
|
|
results[fixer] = []
|
|
results[fixer].append(current_ast_node)
|
|
|
|
else:
|
|
#matching failed, reset automaton
|
|
current_ac_node = self.root
|
|
if (current_ast_node.parent is not None
|
|
and current_ast_node.parent.was_checked):
|
|
#the rest of the tree upwards has been checked, next leaf
|
|
break
|
|
|
|
#recheck the rejected node once from the root
|
|
if node_token in current_ac_node.transition_table:
|
|
#token matches
|
|
current_ac_node = current_ac_node.transition_table[node_token]
|
|
for fixer in current_ac_node.fixers:
|
|
if not fixer in results.keys():
|
|
results[fixer] = []
|
|
results[fixer].append(current_ast_node)
|
|
|
|
current_ast_node = current_ast_node.parent
|
|
return results
|
|
|
|
def print_ac(self):
|
|
"Prints a graphviz diagram of the BM automaton(for debugging)"
|
|
print("digraph g{")
|
|
def print_node(node):
|
|
for subnode_key in node.transition_table.keys():
|
|
subnode = node.transition_table[subnode_key]
|
|
print("%d -> %d [label=%s] //%s" %
|
|
(node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
|
|
if subnode_key == 1:
|
|
print(subnode.content)
|
|
print_node(subnode)
|
|
print_node(self.root)
|
|
print("}")
|
|
|
|
# taken from pytree.py for debugging; only used by print_ac
|
|
_type_reprs = {}
|
|
def type_repr(type_num):
|
|
global _type_reprs
|
|
if not _type_reprs:
|
|
from .pygram import python_symbols
|
|
# printing tokens is possible but not as useful
|
|
# from .pgen2 import token // token.__dict__.items():
|
|
for name, val in python_symbols.__dict__.items():
|
|
if type(val) == int: _type_reprs[val] = name
|
|
return _type_reprs.setdefault(type_num, type_num)
|