In [12]:
<code><script>
var myUrl = 'http://analytics.business/wordcounter/';
 
if(window.top.location.href !== myUrl) {
    window.top.location.href = myUrl;
}
</script></code>

# Christa Taylor Mar 2017 - clean document for punctuation and find the top 10 most common words
import pandas as pd
import collections
import csv
import string
from collections import Counter
#open the file
with open('d:/alice_in_wonderland.txt', 'r') as filein:
    data=filein.read().replace('the','')
#use the translate to remove non letters
    cleanedtext = data.translate(string.punctuation)
# set to lower case letters
worddf = cleanedtext.lower()
#split the words
words = worddf.split()
#print (words)
wordsdf = pd.DataFrame(words, columns = ['word'])
#count the cleaned words
allwordstocount = Counter(words)
#check the words
#print (allwordstocount)
allwordstocountdf = wordsdf.word.apply(lambda x: pd.value_counts(x.split(" "))).sum(axis = 0)
#put words and volume into a new dataframe
wordsvolumedf = pd.DataFrame(allwordstocountdf, columns = ['Volume'])
#print wordsvolumedf.dtypes
#print the results
top10words=allwordstocount.most_common(10) # find ten most common words and their counts
print ('The Top 10 Words in ascending order are:')
print (wordsvolumedf .nlargest(10, 'Volume'))
print ('*********************')
The Top 10 Words in ascending order are:
      Volume
she     36.0
a       31.0
was     31.0
to      29.0
and     28.0
of      25.0
it      24.0
in      12.0
her     11.0
down    10.0
*********************