From eff571a639a2d1e1c798d6d74bb9f29b7069e7da Mon Sep 17 00:00:00 2001
From: ddaugherty97 <daniel.daugherty@students.olin.edu>
Date: Sat, 26 Mar 2016 15:26:47 -0400
Subject: [PATCH 1/2] Completed Toolbox Assignment

---
 learning_curve.py | 21 +++++++++++----------
 questions.txt     |  4 ++++
 questions.txt~    |  4 ++++
 3 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 questions.txt
 create mode 100644 questions.txt~

diff --git a/learning_curve.py b/learning_curve.py
index 2364f2c..ea968ee 100644
--- a/learning_curve.py
+++ b/learning_curve.py
@@ -7,20 +7,21 @@
 from sklearn.linear_model import LogisticRegression
 
 data = load_digits()
-print data.DESCR
-num_trials = 10
+num_trials = 100
 train_percentages = range(5,95,5)
-test_accuracies = numpy.zeros(len(train_percentages))
+test_accuracies = [] # numpy.zeros(len(train_percentages))
 
-# train a model with training percentages between 5 and 90 (see train_percentages) and evaluate
-# the resultant accuracy.
-# You should repeat each training percentage num_trials times to smooth out variability
-# for consistency with the previous example use model = LogisticRegression(C=10**-10) for your learner
-
-# TODO: your code here
+for n in train_percentages: # For each number within train_percentages
+    average_test = 0
+    for i in range(0,num_trials): # Run each percentage num_trials times
+       X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size = n)
+       model = LogisticRegression(C=10**-10)
+       model.fit(X_train, y_train)
+       average_test += model.score(X_test, y_test) # Take average of results
+    test_accuracies.append(average_test/num_trials) # append average accuracy to test_accuracies
 
 fig = plt.figure()
 plt.plot(train_percentages, test_accuracies)
 plt.xlabel('Percentage of Data Used for Training')
 plt.ylabel('Accuracy on Test Set')
-plt.show()
+plt.show()
\ No newline at end of file
diff --git a/questions.txt b/questions.txt
new file mode 100644
index 0000000..c674e3e
--- /dev/null
+++ b/questions.txt
@@ -0,0 +1,4 @@
+1. The general trend of the curve is upwards.
+2. Yes, the beginning of the graph tends to be noisier than the rest of the graph. I believe this occurs because the computer is trying to test with only a small portion of the data which would cause a lot of variability in the accuracy of predictions.
+3. Around 1000 trials gave me a smooth curve.
+4.  Higher values of C gave me a smother curve that typically make an increasing graph with negative concavity.  Lower values of C made the graph much more noisy.
diff --git a/questions.txt~ b/questions.txt~
new file mode 100644
index 0000000..c674e3e
--- /dev/null
+++ b/questions.txt~
@@ -0,0 +1,4 @@
+1. The general trend of the curve is upwards.
+2. Yes, the beginning of the graph tends to be noisier than the rest of the graph. I believe this occurs because the computer is trying to test with only a small portion of the data which would cause a lot of variability in the accuracy of predictions.
+3. Around 1000 trials gave me a smooth curve.
+4.  Higher values of C gave me a smother curve that typically make an increasing graph with negative concavity.  Lower values of C made the graph much more noisy.

From 6be3f70179362c36fc9261025e5db520a5c824a5 Mon Sep 17 00:00:00 2001
From: ddaugherty97 <daniel.daugherty@students.olin.edu>
Date: Sat, 26 Mar 2016 15:29:13 -0400
Subject: [PATCH 2/2] whoops

---
 questions.txt~ | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 questions.txt~

diff --git a/questions.txt~ b/questions.txt~
deleted file mode 100644
index c674e3e..0000000
--- a/questions.txt~
+++ /dev/null
@@ -1,4 +0,0 @@
-1. The general trend of the curve is upwards.
-2. Yes, the beginning of the graph tends to be noisier than the rest of the graph. I believe this occurs because the computer is trying to test with only a small portion of the data which would cause a lot of variability in the accuracy of predictions.
-3. Around 1000 trials gave me a smooth curve.
-4.  Higher values of C gave me a smother curve that typically make an increasing graph with negative concavity.  Lower values of C made the graph much more noisy.