This is my first question, and I apologize if its a bit long on the code-example side.
As part of a job application I was asked to write a Bit Torrent file parser that exposed some of the fields. I did the code, and was told my code was "not quite at the level that we require from a team lead". Ouch!
That's fine its, been years since I have coded, and list comprehensions, generators did not exist back in the day (I started with COBOL, but have coded with C, C++, etc). To me the below code is very clean. Sometimes there is no need to use more complex structures, syntax or patterns - "Keep it Simple".
Could I ask some Python guru's to critique this code please? I'm believe it is useful to others to see where the code could be improved. There were more comments, etc (the bencode.py is from http://wiki.theory.org/Decoding_bencoded_data_with_python )
The areas I can think of:
I was personally proud of this code, so would like to know where I need to improve. Thanks.
#!/usr/bin/env python2
"""Bit Torrent Parsing
Parses a Bit Torrent file.
A basic parser for Bit Torrent files. Visit http://wiki.theory.org/BitTorrentSpecification for the BitTorrent specification.
"""
__author__ = "...."
__version__ = "$Revision: 1.0 $"
__date__ = "$Date: 2012/10/26 11:08:46 $"
__copyright__ = "Enjoy & Distribute"
__license__ = "Python"
import bencode
import argparse
from argparse import RawTextHelpFormatter
import binascii
import time
import os
import pprint
torrent_files = 0
torrent_pieces = 0
def display_root(filename, root):
"""prints main (root) information on torrent"""
global torrent_files
global torrent_pieces
print
print "Bit Torrent Metafile Structure root nodes:"
print "------------------------------------------"
print "Torrent filename: ", filename
print " Info: %d file(s), %d pieces, ~%d kb/pc" % (
torrent_files,
torrent_pieces,
root['info']['piece length'] / 1024)
if 'private' in root['info']:
if root['info']['private'] == 1:
print " Publish presence: Private"
print " Announce: ", root['announce']
if 'announce-list' in root:
print " Announce List: "
for i in root['announce-list']:
print " ", i[0]
if 'creation date' in root:
print " Creation Date: ", time.ctime(root['creation date'])
if 'comment' in root:
print " Comment: ", root['comment']
if 'created-by' in root:
print " Created-By: ", root['created-by']
print " Encoding: ", root['encoding']
print
def display_torrent_file(info):
"""prints file information (single or multifile)"""
global torrent_files
global torrent_pieces
if 'files' in info:
# multipart file mode
# directory, followed by filenames
print "Files:"
max_files = args.maxfiles
display = max_files if (max_files < torrent_files) else torrent_files
print " %d File %d shown: " % (torrent_files, display)
print " Directory: ", info['name']
print " Filenames:"
i = 0
for files in info['files']:
if i < max_files:
prefix = ''
if len(files['path']) > 1:
prefix = './'
filename = prefix + '/'.join(files['path'])
if args.filehash:
if 'md5sum' in files:
md5hash = binascii.hexlify(files['md5sum'])
else:
md5hash = 'n/a'
print ' %s [hash: %s]' % (filename, md5hash)
else:
print ' %s ' % filename
i += 1
else:
break
else:
# single file mode
print "Filename: ", info['name']
print
def display_pieces(pieceDict):
"""prints SHA1 hash for pieces, limited by arg pieces"""
global torrent_files
global torrent_pieces
# global pieceDict
# limit since a torrent file can have 1,000's of pieces
max_pieces = args.pieces if args.pieces else 10
print "Pieces:"
print " Torrent contains %s pieces, %d shown."% (
torrent_pieces, max_pieces)
print " piece : sha1"
i = 0
while i < max_pieces and i < torrent_pieces:
# print SHA1 hash in readable hex format
print ' %5d : %s' % (i+1, binascii.hexlify(pieceDict[i]))
i += 1
def parse_pieces(root):
"""create dictionary [ piece-num, hash ] from info's pieces
Returns the pieces dictionary. key is the piece number, value is the
SHA1 hash value (20-bytes)
Keyword arguments:
root -- a Bit Torrent Metafile root dictionary
"""
global torrent_pieces
pieceDict = {}
i = 0
while i < torrent_pieces:
pieceDict[i] = root['info']['pieces'][(20*i):(20*i)+20]
i += 1
return pieceDict
def parse_root_str(root_str):
"""create dictionary [ piece-num, hash ] from info's pieces
Returns the complete Bit Torrent Metafile Structure dictionary with
relevant Bit Torrent Metafile nodes and their values.
Keyword arguments:
root_str -- a UTF-8 encoded string with root-level nodes (e.g., info)
"""
global torrent_files
global torrent_pieces
try:
torrent_root = bencode.decode(root_str)
except StandardError:
print 'Error in torrent file, likely missing separators like ":"'
if 'files' in torrent_root['info']:
torrent_files = len(torrent_root['info']['files'])
else:
torrent_files = 1
torrent_pieces = len(torrent_root['info']['pieces']) / 20
torrent_piece = parse_pieces(torrent_root)
return torrent_root, torrent_piece
def readfile(filename):
"""read file and return file's data"""
global torrent_files
global torrent_pieces
if os.path.exists(filename):
with open(filename, mode='rb') as f:
filedata = f.read()
else:
print "Error: filename: '%s' does not exist." % filename
raise IOError("Filename not found.")
return filedata
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter,
description=
"A basic parser for Bit Torrent files. Visit "
"http://wiki.theory.org/BitTorrentSpecification for "
"the BitTorrent specification.",
epilog=
"The keys for the Bit Torrent MetaInfo File Structure "
"are info, announce, announce-list, creation date, comment, "
"created by and encoding. \n"
"The Info Dictionary (info) is dependant on whether the torrent "
"file is a single or multiple file. The keys common to both "
"are piece length, pieces and private.\nFor single files, the "
"additional keys are name, length and md5sum.For multiple files "
"the keys are, name and files. files is also a dictionary with "
"keys length, md5sum and path.\n\n"
"Examples:\n"
"torrentparse.py --string 'l4:dir14:dir28:file.exte'\n"
"torrentparse.py --filename foo.torrent\n"
"torrentparse.py -f foo.torrent -f bar.torrent "
"--maxfiles 2 --filehash --pieces 2 -v")
filegroup = parser.add_argument_group('Input File or String')
filegroup.add_argument("-f", "--filename",
help="name of torrent file to parse",
action='append')
filegroup.add_argument("-fh", "--filehash",
help="display file's MD5 hash",
action = "store_true")
filegroup.add_argument("-maxf", "--maxfiles",
help="display X filenames (default=20)",
metavar = 'X',
type=int, default=20)
piecegroup = parser.add_argument_group('Torrent Pieces')
piecegroup.add_argument("-p", "--pieces",
help = "display X piece's SHA1 hash (default=10)",
metavar = 'X',
type = int)
parser.add_argument("-s", "--string",
help="string for bencoded dictionary item")
parser.add_argument("-v", "--verbose",
help = "Display MetaInfo file to stdout",
action = "store_true")
args = parser.parse_args()
if args.string:
print
text = bencode.decode(args.string)
print text
else:
for fn in args.filename:
try:
filedata = readfile(fn)
torrent_root, torrent_piece = parse_root_str(filedata)
except IOError:
print "Please enter a valid filename"
raise
if torrent_root:
display_root(fn, torrent_root)
display_torrent_file(torrent_root['info'])
if args.pieces:
display_pieces(torrent_piece)
verbose = True if args.verbose else False
if verbose:
print
print "Verbose Mode: \nPrinting root and info dictionaries"
# remove pieces as its long. display it afterwards
pieceless_root = torrent_root
del pieceless_root['info']['pieces']
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(pieceless_root)
print
print "Print info's piece information: "
pp.pprint(torrent_piece)
print
print "\n"
The following snippet:
i = 0
while i < torrent_pieces:
pieceDict[i] = root['info']['pieces'][(20*i):(20*i)+20]
i += 1
should be replaced by:
for i in range(torrent_pieces):
pieceDict[i] = root['info']['pieces'][(20*i):(20*i)+20]
That might be the kind of thing they want to see. In general, Python code shouldn't need explicit index variable manipulation in for
loops very much.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With