From eff571a639a2d1e1c798d6d74bb9f29b7069e7da Mon Sep 17 00:00:00 2001 From: ddaugherty97 Date: Sat, 26 Mar 2016 15:26:47 -0400 Subject: [PATCH 1/2] Completed Toolbox Assignment --- learning_curve.py | 21 +++++++++++---------- questions.txt | 4 ++++ questions.txt~ | 4 ++++ 3 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 questions.txt create mode 100644 questions.txt~ diff --git a/learning_curve.py b/learning_curve.py index 2364f2c..ea968ee 100644 --- a/learning_curve.py +++ b/learning_curve.py @@ -7,20 +7,21 @@ from sklearn.linear_model import LogisticRegression data = load_digits() -print data.DESCR -num_trials = 10 +num_trials = 100 train_percentages = range(5,95,5) -test_accuracies = numpy.zeros(len(train_percentages)) +test_accuracies = [] # numpy.zeros(len(train_percentages)) -# train a model with training percentages between 5 and 90 (see train_percentages) and evaluate -# the resultant accuracy. -# You should repeat each training percentage num_trials times to smooth out variability -# for consistency with the previous example use model = LogisticRegression(C=10**-10) for your learner - -# TODO: your code here +for n in train_percentages: # For each number within train_percentages + average_test = 0 + for i in range(0,num_trials): # Run each percentage num_trials times + X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size = n) + model = LogisticRegression(C=10**-10) + model.fit(X_train, y_train) + average_test += model.score(X_test, y_test) # Take average of results + test_accuracies.append(average_test/num_trials) # append average accuracy to test_accuracies fig = plt.figure() plt.plot(train_percentages, test_accuracies) plt.xlabel('Percentage of Data Used for Training') plt.ylabel('Accuracy on Test Set') -plt.show() +plt.show() \ No newline at end of file diff --git a/questions.txt b/questions.txt new file mode 100644 index 0000000..c674e3e --- /dev/null +++ b/questions.txt @@ -0,0 +1,4 @@ +1. The general trend of the curve is upwards. +2. Yes, the beginning of the graph tends to be noisier than the rest of the graph. I believe this occurs because the computer is trying to test with only a small portion of the data which would cause a lot of variability in the accuracy of predictions. +3. Around 1000 trials gave me a smooth curve. +4. Higher values of C gave me a smother curve that typically make an increasing graph with negative concavity. Lower values of C made the graph much more noisy. diff --git a/questions.txt~ b/questions.txt~ new file mode 100644 index 0000000..c674e3e --- /dev/null +++ b/questions.txt~ @@ -0,0 +1,4 @@ +1. The general trend of the curve is upwards. +2. Yes, the beginning of the graph tends to be noisier than the rest of the graph. I believe this occurs because the computer is trying to test with only a small portion of the data which would cause a lot of variability in the accuracy of predictions. +3. Around 1000 trials gave me a smooth curve. +4. Higher values of C gave me a smother curve that typically make an increasing graph with negative concavity. Lower values of C made the graph much more noisy. From 6be3f70179362c36fc9261025e5db520a5c824a5 Mon Sep 17 00:00:00 2001 From: ddaugherty97 Date: Sat, 26 Mar 2016 15:29:13 -0400 Subject: [PATCH 2/2] whoops --- questions.txt~ | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 questions.txt~ diff --git a/questions.txt~ b/questions.txt~ deleted file mode 100644 index c674e3e..0000000 --- a/questions.txt~ +++ /dev/null @@ -1,4 +0,0 @@ -1. The general trend of the curve is upwards. -2. Yes, the beginning of the graph tends to be noisier than the rest of the graph. I believe this occurs because the computer is trying to test with only a small portion of the data which would cause a lot of variability in the accuracy of predictions. -3. Around 1000 trials gave me a smooth curve. -4. Higher values of C gave me a smother curve that typically make an increasing graph with negative concavity. Lower values of C made the graph much more noisy.