Given a Python script with print()
statements, I'd like to be able to run through the script and insert a comment after each statement that shows the output from each. To demonstrate, take this script named example.py
:
a, b = 1, 2
print('a + b:', a + b)
c, d = 3, 4
print('c + d:', c + d)
The desired output would be:
a, b = 1, 2
print('a + b:', a + b)
# a + b: 3
c, d = 3, 4
print('c + d:', c + d)
# c + d: 7
Here's my attempt, which works for simple examples like the one above:
import sys
from io import StringIO
def intercept_stdout(func):
"redirect stdout from a target function"
def wrapper(*args, **kwargs):
"wrapper function for intercepting stdout"
# save original stdout
original_stdout = sys.stdout
# set up StringIO object to temporarily capture stdout
capture_stdout = StringIO()
sys.stdout = capture_stdout
# execute wrapped function
func(*args, **kwargs)
# assign captured stdout to value
func_output = capture_stdout.getvalue()
# reset stdout
sys.stdout = original_stdout
# return captured value
return func_output
return wrapper
@intercept_stdout
def exec_target(name):
"execute a target script"
with open(name, 'r') as f:
exec(f.read())
def read_target(name):
"read source code from a target script & return it as a list of lines"
with open(name) as f:
source = f.readlines()
# to properly format last comment, ensure source ends in a newline
if len(source[-1]) >= 1 and source[-1][-1] != '\n':
source[-1] += '\n'
return source
def annotate_source(target):
"given a target script, return the source with comments under each print()"
target_source = read_target(target)
# find each line that starts with 'print(' & get indices in reverse order
print_line_indices = [i for i, j in enumerate(target_source)
if len(j) > 6 and j[:6] == 'print(']
print_line_indices.reverse()
# execute the target script and get each line output in reverse order
target_output = exec_target(target)
printed_lines = target_output.split('\n')
printed_lines.reverse()
# iterate over the source and insert commented target output line-by-line
annotated_source = []
for i, line in enumerate(target_source):
annotated_source.append(line)
if print_line_indices and i == print_line_indices[-1]:
annotated_source.append('# ' + printed_lines.pop() + '\n')
print_line_indices.pop()
# return new annotated source as a string
return ''.join(annotated_source)
if __name__ == '__main__':
target_script = 'example.py'
with open('annotated_example.py', 'w') as f:
f.write(annotate_source(target_script))
However, it fails for scripts with print()
statements that span multiple lines, as well as for print()
statements that aren't at the start of a line. In a best-case scenario, it would even work for print()
statements inside a function. Take the following example:
print('''print to multiple lines, first line
second line
third line''')
print('print from partial line, first part') if True else 0
1 if False else print('print from partial line, second part')
print('print from compound statement, first part'); pass
pass; print('print from compound statement, second part')
def foo():
print('bar')
foo()
Ideally, the output would look like this:
print('''print to multiple lines, first line
second line
third line''')
# print to multiple lines, first line
# second line
# third line
print('print from partial line, first part') if True else 0
# print from partial line, first part
1 if False else print('print from partial line, second part')
# print from partial line, second part
print('print from compound statement, first part'); pass
# print from compound statement, first part
pass; print('print from compound statement, second part')
# print from compound statement, second part
def foo():
print('bar')
foo()
# bar
But the script above mangles it like so:
print('''print to multiple lines, first line
# print to multiple lines, first line
second line
third line''')
print('print from partial line, first part') if True else 0
# second line
1 if False else print('print from partial line, second part')
print('print from compound statement, first part'); pass
# third line
pass; print('print from compound statement, second part')
def foo():
print('bar')
foo()
What approach would make this process more robust?
Have you considered using the inspect
module? If you are willing to say that you always want the annotations next to the top most call, and the file you are annotating is simple enough, you can get reasonable results. The following is my attempt, which overrides the built in print function and looks at a stack trace to determine where print was called:
import inspect
import sys
from io import StringIO
file_changes = {}
def anno_print(old_print, *args, **kwargs):
(frame, filename, line_number,
function_name, lines, index) = inspect.getouterframes(inspect.currentframe())[-2]
if filename not in file_changes:
file_changes[filename] = {}
if line_number not in file_changes[filename]:
file_changes[filename][line_number] = []
orig_stdout = sys.stdout
capture_stdout = StringIO()
sys.stdout = capture_stdout
old_print(*args, **kwargs)
output = capture_stdout.getvalue()
file_changes[filename][line_number].append(output)
sys.stdout = orig_stdout
return
def make_annotated_file(old_source, new_source):
changes = file_changes[old_source]
old_source_F = open(old_source)
new_source_F = open(new_source, 'w')
content = old_source_F.readlines()
for i in range(len(content)):
line_num = i + 1
new_source_F.write(content[i])
if content[i][-1] != '\n':
new_source_F.write('\n')
if line_num in changes:
for output in changes[line_num]:
output = output[:-1].replace('\n', '\n#') + '\n'
new_source_F.write("#" + output)
new_source_F.close()
if __name__=='__main__':
target_source = "foo.py"
old_print = __builtins__.print
__builtins__.print = lambda *args, **kwargs: anno_print(old_print, *args, **kwargs)
with open(target_source) as f:
code = compile(f.read(), target_source, 'exec')
exec(code)
__builtins__.print = old_print
make_annotated_file(target_source, "foo_annotated.py")
If I run it on the following file "foo.py":
def foo():
print("a")
print("b")
def cool():
foo()
print("c")
def doesnt_print():
a = 2 + 3
print(1+2)
foo()
doesnt_print()
cool()
The output is "foo_annotated.py":
def foo():
print("a")
print("b")
def cool():
foo()
print("c")
def doesnt_print():
a = 2 + 3
print(1+2)
#3
foo()
#a
#b
doesnt_print()
cool()
#a
#b
#c
You can make it a lot easier by using an existing python parser to extract top level statements from your code. The ast module in the standard library for example. However, ast loses some information like comments.
Libraries built with source code transformations (which you are doing) in mind might be more suited here. redbaron is a nice example.
To carry globals to the next exec()
, you have to use the second parameter (documentation):
environment = {}
for statement in statements:
exec(statement, environment)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With