Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Python: Create a Binary search Tree using a list

The objective of my code is to get each seperate word from a txt file and put it into a list and then making a binary search tree using that list to count the frequency of each word and printing each word in alphabetical order along with its frequency. Each word in the can only contain letters, numbers, -, or ' The part that I am unable to do with my beginner programming knowledge is to make the Binary Search Tree using the list I have (I am only able to insert the whole list in one Node instead of putting each word in a Node to make the tree). The code I have so far is this:

def read_words(filename):
    openfile = open(filename, "r")
    templist = []
    letterslist = []
    for lines in openfile:
        for i in lines:
            ii = i.lower()
            letterslist.append(ii)
    for p in letterslist:
        if p not in ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',"'","-",' '] and p.isdigit() == False:
            letterslist.remove(p)      
    wordslist = list("".join(letterslist).split())
    return wordslist

class BinaryTree:
    class _Node:
        def __init__(self, value, left=None, right=None):
            self._left = left
            self._right = right
            self._value = value
            self._count = 1


    def __init__(self):
        self.root = None

    def isEmpty(self):
        return self.root == None

    def insert(self, value) :
        if self.isEmpty() :
            self.root = self._Node(value)
            return
        parent = None
        pointer = self.root
        while (pointer != None) :
            if value == pointer._value:
                pointer._count += 1
                return

            elif value < pointer._value:
                parent = pointer
                pointer = pointer._left

            else :
                parent = pointer
                pointer = pointer._right            

        if (value <= parent._value) :
            parent._left = self._Node(value)
        else :
            parent._right = self._Node(value)    

    def printTree(self):
        pointer = self.root
        if pointer._left is not None:
            pointer._left.printTree()
        print(str(pointer._value) + " " + str(pointer._count))
        if pointer._right is not None:
            pointer._right.printTree()




    def createTree(self,words):
        if len(words) > 0:
            for word in words:
                BinaryTree().insert(word)
            return BinaryTree()
        else:
            return None

    def search(self,tree, word):
        node = tree
        depth = 0
        count = 0
        while True:
            print(node.value)
            depth += 1
            if node.value == word:
                count = node.count
                break
            elif word < node.value:
                node = node.left
            elif word > node.value:
                node = node.right
        return depth, count


def main():
    words = read_words('sample.txt')
    b = BinaryTree()
    b.insert(words)
    b.createTree(words)
    b.printTree()
like image 331
Mike Thavam Avatar asked Dec 19 '22 18:12

Mike Thavam


1 Answers

Since you're a beginner I'd advice to implement the tree methods with recursion instead of iteration since this will result to simpler implementation. While recursion might seem a bit difficult concept at first often it is the easiest approach.

Here's a draft implementation of a binary tree which uses recursion for insertion, searching and printing the tree, it should support the functionality you need.

class Node(object):
    def __init__(self, value):
        self.value = value
        self.left = None
        self.right = None
        self.count = 1

    def __str__(self):
        return 'value: {0}, count: {1}'.format(self.value, self.count)

def insert(root, value):
    if not root:
        return Node(value)
    elif root.value == value:
        root.count += 1
    elif value < root.value:
        root.left = insert(root.left, value)
    else:
        root.right = insert(root.right, value)

    return root

def create(seq):
    root = None
    for word in seq:
        root = insert(root, word)

    return root

def search(root, word, depth=1):
    if not root:
        return 0, 0
    elif root.value == word:
        return depth, root.count
    elif word < root.value:
        return search(root.left, word, depth + 1)
    else:
        return search(root.right, word, depth + 1)

def print_tree(root):
    if root:
        print_tree(root.left)
        print root
        print_tree(root.right)

src = ['foo', 'bar', 'foobar', 'bar', 'barfoo']
tree = create(src)
print_tree(tree)

for word in src:
    print 'search {0}, result: {1}'.format(word, search(tree, word))

# Output
# value: bar, count: 2
# value: barfoo, count: 1
# value: foo, count: 1
# value: foobar, count: 1
# search foo, result: (1, 1)
# search bar, result: (2, 2)
# search foobar, result: (2, 1)
# search bar, result: (2, 2)
# search barfoo, result: (3, 1)
like image 170
niemmi Avatar answered Dec 21 '22 09:12

niemmi