diff --git a/DocTesting.txt b/DocTesting.txt new file mode 100644 index 0000000..109147d --- /dev/null +++ b/DocTesting.txt @@ -0,0 +1,2 @@ +This is used +for doc testing diff --git a/TextMiningWriteup.pdf b/TextMiningWriteup.pdf new file mode 100644 index 0000000..b31d729 Binary files /dev/null and b/TextMiningWriteup.pdf differ diff --git a/TheGivingTree.txt b/TheGivingTree.txt new file mode 100644 index 0000000..f176a95 --- /dev/null +++ b/TheGivingTree.txt @@ -0,0 +1,47 @@ +the giving tree by shel silverstein +once there was a tree and she loved a little boy +and every day the boy would come and he would gather her leaves and make them into crowns and play king of the forest +he would climb up her trunk and swing from her branches and eat apples +and they would play hid and go seek +and when he was tired he would sleep in her shade +and the boy loved the tree very much +and the tree was happy +but time went by and the boy grew older +and the tree was often alone +and then one day the boy came to the tree +and the tree said come boy come climb up my trunk and swing from my branches +and eat apples and play in my shade and be happy +i am too big to climb and play said the boy +i want to buy things and have fun +i want some money +can you give me some money +i am sorry said the tree but i have no money +i have only leaves and apples +take my applpes boy and sell them in the city +then you will have money and you will be happy +and so the boy climbed up the tree and gethered her apples and carried them away and the tree was happy +but the boy stayed away for a long time and the tree was sad +and then one day the boy came back and the tree shook with joy and she said come boy climb up my trunk and swing from my branches and be happy +i am too busy to climb trees said the boy i want a house to keep me warm i want a wife and i want children and so i need a house can you give me a house +i have no house said the tree the forest is my house but you may cut off my branches and build a house then you will be happy +and the boy cut off her branches and carried them away to build his house +and the tree was happy +but the boy stayed away for a long time +and when he came back the tree was so happy she could hardly speek +come boy she wispered come and play +i am too old and sad to play said the boy i want a boat that can take me far away from here can you give me a boat +cut down my trunk and make a boat said the tree Then you can sail away and be happy +and so the boy but down her trunk and made a boat and sailed away +and the tree was happy but not really +and after a long time the boy came back again +i am sorry boy said the tree but i have nothing left to give you my apples are gone +my teeth are too weak for apples said the boy +my branches are gone said the tree you cannot swing on them +i am too old to swing on branches said the boy +my trunk is gone said the tree you cannot climb +i am too tired to climb said the boy +i am sorry said the tree i wish that i could give you something but i have nothing left i am just an old stump +i do not need very much now said the boy just a quiet place to sit and rest i am very tired +well said the tree straightening herself up as much as she could well an old stump is good for sitting and resting come boy sit down and rest +and the boy did and the tree was happy +the end \ No newline at end of file diff --git a/TheTaleOfPeterRabbitClean.txt b/TheTaleOfPeterRabbitClean.txt new file mode 100644 index 0000000..f454212 --- /dev/null +++ b/TheTaleOfPeterRabbitClean.txt @@ -0,0 +1,195 @@ +once upon a time there were four little rabbits and their names + +were + +flopsy + +mopsy + +cottontail + +and peter + +they lived with their mother in a sandbank underneath the root of a + +very big firtree + +now my dears said old mrs rabbit one morning you may go into + +the fields or down the lane but dont go into mr mcgregors garden + +your father had an accident there he was put in a pie by mrs + +mcgregor + +now run along and dont get into mischief i am going out + +then old mrs rabbit took a basket and her umbrella and went through + +the wood to the bakers she bought a loaf of brown bread and five + +currant buns + +flopsy mopsy and cottontail who were good little bunnies went + +down the lane to gather blackberries + +but peter who was very naughty ran straight away to mr mcgregors + +garden and squeezed under the gate + +first he ate some lettuces and some french beans and then he ate + +some radishes + +and then feeling rather sick he went to look for some parsley + +but round the end of a cucumber frame whom should he meet but mr + +mcgregor + +mr mcgregor was on his hands and knees planting out young cabbages + +but he jumped up and ran after peter waving a rake and calling out + +stop thief + +peter was most dreadfully frightened he rushed all over the garden + +for he had forgotten the way back to the gate + +he lost one of his shoes among the cabbages and the other shoe + +amongst the potatoes + +after losing them he ran on four legs and went faster so that i + +think he might have got away altogether if he had not unfortunately + +run into a gooseberry net and got caught by the large buttons on his + +jacket it was a blue jacket with brass buttons quite new + +peter gave himself up for lost and shed big tears but his sobs were + +overheard by some friendly sparrows who flew to him in great + +excitement and implored him to exert himself + +mr mcgregor came up with a sieve which he intended to pop upon the + +top of peter but peter wriggled out just in time leaving his jacket + +behind him + +and rushed into the toolshed and jumped into a can it would have + +been a beautiful thing to hide in if it had not had so much water in it + +mr mcgregor was quite sure that peter was somewhere in the + +toolshed perhaps hidden underneath a flowerpot he began to turn + +them over carefully looking under each + +presently peter sneezedkertyschoo mr mcgregor was after him in + +no time + +and tried to put his foot upon peter who jumped out of a window + +upsetting three plants the window was too small for mr mcgregor and + +he was tired of running after peter he went back to his work + +peter sat down to rest he was out of breath and trembling with + +fright and he had not the least idea which way to go also he was + +very damp with sitting in that can + +after a time he began to wander about going lippitylippitynot + +very fast and looking all round + +he found a door in a wall but it was locked and there was no room + +for a fat little rabbit to squeeze underneath + +an old mouse was running in and out over the stone doorstep carrying + +peas and beans to her family in the wood peter asked her the way to + +the gate but she had such a large pea in her mouth that she could not + +answer she only shook her head at him peter began to cry + +then he tried to find his way straight across the garden but he + +became more and more puzzled presently he came to a pond where mr + +mcgregor filled his watercans a white cat was staring at some + +goldfish she sat very very still but now and then the tip of her + +tail twitched as if it were alive peter thought it best to go away + +without speaking to her he had heard about cats from his cousin + +little benjamin bunny + +he went back towards the toolshed but suddenly quite close to him + +he heard the noise of a hoescrrritch scratch scratch scritch + +peter scuttered underneath the bushes but presently as nothing + +happened he came out and climbed upon a wheelbarrow and peeped over + +the first thing he saw was mr mcgregor hoeing onions his back was + +turned towards peter and beyond him was the gate + +peter got down very quietly off the wheelbarrow and started running + +as fast as he could go along a straight walk behind some + +blackcurrant bushes + +mr mcgregor caught sight of him at the corner but peter did not + +care he slipped underneath the gate and was safe at last in the wood + +outside the garden + +mr mcgregor hung up the little jacket and the shoes for a scarecrow + +to frighten the blackbirds + +peter never stopped running or looked behind him till he got home to + +the big firtree + +he was so tired that he flopped down upon the nice soft sand on the + +floor of the rabbithole and shut his eyes his mother was busy + +cooking she wondered what he had done with his clothes it was the + +second little jacket and pair of shoes that peter had lost in a + +fortnight + +i am sorry to say that peter was not very well during the evening + +his mother put him to bed and made some camomile tea and she gave a + +dose of it to peter + +one tablespoonful to be taken at bedtime + +but flopsy mopsy and cottontail had bread and milk and + +blackberries for supper + +the end \ No newline at end of file diff --git a/TheVeryHungryCatipillar.txt b/TheVeryHungryCatipillar.txt new file mode 100644 index 0000000..21f3683 --- /dev/null +++ b/TheVeryHungryCatipillar.txt @@ -0,0 +1,22 @@ +the very hungry caterpillar by eric carle +in the light of the moon a little egg lay on a leaf +one sunday morning the warm sun came up and pop out of the egg came a tiny very hungry caterpillar +he started to look for some food +on monday he ate through one apple +but he was still hungry +on tuesday he ate through two pears +but he was still hungry +on wednesday he ate through three plums +but he was still hungry +on thursday he ate through four strawberries but he was still hungry +on friday he ate through five oranges but he was still hungry +on saturday he ate one peice of chocolate cake one ice cream cone one pickle one slice of swiss cheese one slice of salami one lollipop one peice of cherry pie one sausage one cupcake and one slice of watermellon +that night he had a stomach ache +the next day was sunday again +the caterpillar ate through one nice green leaf and after that he felt much better +now he was not hungry any more and he was not a little caterpillar any more +he was a big fat caterpillar +he buit a small house called a cocoon around himself +he stayed inside for more than two weeks +then he nibbled a hole in the cocoon pushed his way out and he was a beautiful butterfly +the end \ No newline at end of file diff --git a/text_mining.py b/text_mining.py new file mode 100644 index 0000000..fec7bda --- /dev/null +++ b/text_mining.py @@ -0,0 +1,106 @@ +""" +This compares word frequancy, tone, word length, and the amount of unique words. + + +@author: Lauren Pudvan + +""" +import string +from pattern.web import * +""" +TheTaleOfPeterRabbitURL = URL('http://www.gutenberg.org/cache/epub/14838/pg14838.txt').download() +TheTaleOfPeterRabbit = plaintext(TheTaleOfPeterRabbitURL) + +f_TheTaleOfPeterRabbit = open('TheTaleOfPeterRabbitDownload.txt', 'w') +f_TheTaleOfPeterRabbit.write(TheTaleOfPeterRabbit.encode('UTF-8')) +f_TheTaleOfPeterRabbit.close + +TheTaleOfPeterRabbitClean = open('TheTaleOfPeterRabbitDownload.txt', 'r').read() +exclude = set(string.punctuation) +TheTaleOfPeterRabbitClean = ''.join(ch for ch in TheTaleOfPeterRabbitClean if ch not in exclude) +TheTaleOfPeterRabbitClean = TheTaleOfPeterRabbitClean.lower() + +FinalTheTaleOfPeterRabbit = open('TheTaleOfPeterRabbitClean.txt', 'w') +FinalTheTaleOfPeterRabbit.write(TheTaleOfPeterRabbitClean) +FinalTheTaleOfPeterRabbit.close +""" +#That was an example of how I got one of the books downloaded. +#Because Gutenberg was down I got the other two by copy and pasting them into a plain text file. + + +from pattern.en import * +import operator + +def word_frequency(book): + """ This goes through each word of the story and + if it does not exist in the dictionary it creates a key of the word and gives it a value of 1. + If it does exist in the dictionary it increases the value by 1. + Then it sorts the dictionary from lovest to highest values (words that occure most are at the end) + Then it returns the dictionary. + >>> word_frequency('DocTesting.txt') + [('used', 1), ('for', 1), ('This', 1), ('doc', 1), ('is', 1), ('testing', 1)] + """ + f = open(book,'r') # sets f equal to a sting of the book + wordcount={} # new dictionary + for word in f.read().split(): + if word not in wordcount: + wordcount[word] = 1 + else: + wordcount[word] += 1 + f.close(); + sorted_wordcount = sorted(wordcount.items(), key=operator.itemgetter(1)) + return sorted_wordcount + +def amount_of_independent_words(book): # The amount of unique words not counting repetition. + """ This takes the dictionary result from the word frequancy function and returns the length of that dictionary. + The length of that dictionary is the number of original words. + >>> amount_of_independent_words('DocTesting.txt') + 6 + """ + dictOfWords = word_frequency(book) + return len(dictOfWords) + +def average_word_length(book): + """ This will append a list with the length of each word then take the avarage of the list. + This gives the average word length. + >>> average_word_length('DocTesting.txt') + 3.8333333333333335 + """ + f = open(book,'r') # sets f equal to a sting of the book + wordLenths = [] + for word in f.read().split(): + length = len(word) + wordLenths.append(length) + f.close(); + return sum(wordLenths) / float(len(wordLenths)) + +def tone(book): + """ This takes in a string and returns (positive sentiment polarity) + I do not know how to predict a doctest for this because i do not know the specifics for how to predict the result of sentiment. + """ + b = open(book,'r') # sets b equal to a sting of the book + b.read() + sent = sentiment(file) + b.close() + return sent[0] + + +# if __name__ == "__main__": +# import doctest +# doctest.testmod() + +print word_frequency('TheVeryHungryCatipillar.txt') +print amount_of_independent_words('TheVeryHungryCatipillar.txt') +print average_word_length('TheVeryHungryCatipillar.txt') +print tone('TheVeryHungryCatipillar.txt') + +print word_frequency('TheGivingTree.txt') +print amount_of_independent_words('TheGivingTree.txt') +print average_word_length('TheGivingTree.txt') +print tone('TheGivingTree.txt') + + +print word_frequency('TheTaleOfPeterRabbitClean.txt') +print amount_of_independent_words('TheTaleOfPeterRabbitClean.txt') +print average_word_length('TheTaleOfPeterRabbitClean.txt') +print tone('TheTaleOfPeterRabbitClean.txt') \ No newline at end of file