As of right now, I have a function to replace the countChars function,
def countWords(lines):
wordDict = {}
for line in lines:
wordList = lines.split()
for word in wordList:
if word in wordDict: wordDict[word] += 1
else: wordDict[word] = 1
return wordDict
but when I run the program it spits out this abomination (this is just an example, there's about two pages of words with a huge number count next to it)
before 1478
battle-field 1478
as 1478
any 1478
altogether 1478
all 1478
ago 1478
advanced. 1478
add 1478
above 1478
While obviously this means that the code is sound enough to run, I'm not getting what I want out of it. It needs to print how many times each word is in the file (gb.txt, which is the Gettysburg address) Obviously each word that is in the file isn't in there exactly 1478 times..
I'm pretty new at programming, so I'm kind of stumped..
from __future__ import division
inputFileName = 'gb.txt'
def readfile(fname):
f = open(fname, 'r')
s = f.read()
f.close()
return s.lower()
def countChars(t):
charDict = {}
for char in t:
if char in charDict: charDict[char] += 1
else: charDict[char] = 1
return charDict
def findMostCommon(charDict):
mostFreq = ''
mostFreqCount = 0
for k in charDict:
if charDict[k] > mostFreqCount:
mostFreqCount = charDict[k]
mostFreq = k
return mostFreq
def printCounts(charDict):
for k in charDict:
#First, handle some chars that don't show up very well when they print
if k == '\n': print '\\n', charDict[k] #newline
elif k == ' ': print 'space', charDict[k]
elif k == '\t': print '\\t', charDict[k] #tab
else: print k, charDict[k] #Normal character - print it with its count
def printAlphabetically(charDict):
keyList = charDict.keys()
keyList.sort()
for k in keyList:
#First, handle some chars that don't show up very well when they print
if k == '\n': print '\\n', charDict[k] #newline
elif k == ' ': print 'space', charDict[k]
elif k == '\t': print '\\t', charDict[k] #tab
else: print k, charDict[k] #Normal character - print it with its count
def printByFreq(charDict):
aList = []
for k in charDict:
aList.append([charDict[k], k])
aList.sort() #Sort into ascending order
aList.reverse() #Put in descending order
for item in aList:
#First, handle some chars that don't show up very well when they print
if item[1] == '\n': print '\\n', item[0] #newline
elif item[1] == ' ': print 'space', item[0]
elif item[1] == '\t': print '\\t', item[0] #tab
else: print item[1], item[0] #Normal character - print it with its count
def main():
text = readfile(inputFileName)
charCounts = countChars(text)
mostCommon = findMostCommon(charCounts)
#print mostCommon + ':', charCounts[mostCommon]
#printCounts(charCounts)
#printAlphabetically(charCounts)
printByFreq(charCounts)
main()
If you need to count a number of words in a passage, then it is better to use regex.
Let's start with a simple example:
import re
my_string = "Wow! Is this true? Really!?!? This is crazy!"
words = re.findall(r'\w+', my_string) #This finds words in the document
Result:
>>> words
['Wow', 'Is', 'this', 'true', 'Really', 'This', 'is', 'crazy']
Note that "Is" and "is" are two different words. My guess is that you want the to count them the same, so we can just capitalize all the words, and then count them.
from collections import Counter
cap_words = [word.upper() for word in words] #capitalizes all the words
word_counts = Counter(cap_words) #counts the number each time a word appears
Result:
>>> word_counts
Counter({'THIS': 2, 'IS': 2, 'CRAZY': 1, 'WOW': 1, 'TRUE': 1, 'REALLY': 1})
Are you good up to here?
Now we need to do exactly the same thing we did above just this time we are reading a file.
import re
from collections import Counter
with open('your_file.txt') as f:
passage = f.read()
words = re.findall(r'\w+', passage)
cap_words = [word.upper() for word in words]
word_counts = Counter(cap_words)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With