I want to index all methods and thee connections between them in an entire application (A directory with sub directories and files eventually). I'm using ast
, looping over directories till individual files and then loads them into an ast
object like so ast.parse(self.file_content)
The index that i'm trying to create is this connection
Here is my code, if it's relevant.
def scan(self):
'''
scans a file line by line while keeping context of location and classes
indexes a file buffer content into a ast, Abstract Syntax Trees, https://en.wikipedia.org/wiki/AST.
Then, iterate over the elements, find relevant ones to index, and index them into the db.
'''
parsed_result = ast.parse(self.file_content)
for element in parsed_result.body:
results = self.index_element(element)
def index_element(self, element, class_name=None):
'''
if element is relevant, meaning method -> index
if element is Class -> recursively call it self
:param element:
:param class_name:
:return: [{insert_result: <db_insert_result>, 'structured_data': <method> object}, ...]
'''
# find classes
# find methods inside classes
# find hanging functions
# validation on element type
if self.should_index_element(element):
if self.is_class_definition(element):
class_element = element
indexed_items = []
for inner_element in class_element.body:
# recursive call
results = self.index_element(inner_element, class_name=class_element.name)
indexed_items += results
return indexed_items
else:
structured_data = self.structure_method_into_an_object(element, class_name=class_name)
result_graph = self.dal_client.methods_graph.insert_or_update(structured_data)
return "WhatEver"
return "WhatEver"
My question is, is it possible to create this graph using ast
. If yes, how?
From my understanding, I currently can't since I'm loading one file at a time to the ast
object and it is not aware of outside methods.
here is an example for 2 files that I want to link between them:
sample_a.py
from sample_class_b import SampleClassB
sample_b = SampleClassB()
class SampleClassA(object):
def __init__(self):
self.a = 1
def test_call_to_another_function(self):
return sample_b.test()
sample_b.py
class SampleClassB(object):
def __init__(self):
self.b = 1
def test(self):
return True
You can traverse the ast.Ast
tree and at each recursive call do one of four things:
class
definition, store the class
name with its associated methods, and then apply Connections.walk
to each of the methods, storing the class
and method name in the scope.import
statement, load the module and recursively run Connections.walk
on it.Connections.walk
is within a method, check if the attribute name is a method of any class
es currently loaded. If so, add an edge to edges
that links the current scope with this new method discovered.import ast, itertools
import re, importlib
class Connections:
def __init__(self):
self._classes, self.edges = {}, []
def walk(self, tree, scope=None):
t_obj = None
if isinstance(tree, ast.ClassDef):
self._classes[tree.name] = [i for i in tree.body if isinstance(i, ast.FunctionDef) and not re.findall('__[a-z]+__', i.name)]
_ = [self.walk(i, [tree.name, i.name]) for i in self._classes[tree.name]]
t_obj = [i for i in tree.body if i not in self._classes[tree.name]]
elif isinstance(tree, (ast.Import, ast.ImportFrom)):
for p in [tree.module] if hasattr(tree, 'module') else [i.name for i in tree.names]:
with open(importlib.import_module(p).__file__) as f:
t_obj = ast.parse(f.read())
elif isinstance(tree, ast.Attribute) and scope is not None:
if (c:=[a for a, b in self._classes.items() if any(i.name == tree.attr for i in b)]):
self.edges.append((scope, [c[0], tree.attr]))
t_obj = tree.value
if isinstance(t_obj:=(tree if t_obj is None else t_obj), list):
for i in t_obj:
self.walk(i, scope = scope)
else:
for i in getattr(t_obj, '_fields', []):
self.walk(getattr(t_obj, i), scope=scope)
with open('sample_a.py') as f:
c = Connections()
c.walk(ast.parse(f.read()))
print(c.edges)
Output:
[(['SampleClassA', 'test_call_to_another_function'], ['SampleClassB', 'test'])]
Important note: depending on the complexity of the files you are running Connections.walk
on, a RecursionError
might occur. To circumvent this, here is a Gist that contains an iterative version of Connections.walk
.
Creating a graph from edges
:
import networkx as nx
import matplotlib.pyplot as plt
g, labels, c1 = nx.DiGraph(), {}, itertools.count(1)
for m1, m2 in c.edges:
if (p1:='.'.join(m1)) not in labels:
labels[p1] = next(c1)
if (p2:='.'.join(m2)) not in labels:
labels[p2] = next(c1)
g.add_node(labels[p1])
g.add_node(labels[p2])
g.add_edge(labels[p1], labels[p2])
nx.draw(g, pos, labels={b:a for a, b in labels.items()}, with_labels = True)
plt.show()
Output:
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With