From d0560e1eea769aca8eee4e30aeca6536b82809ba Mon Sep 17 00:00:00 2001 From: hmowen Date: Sun, 27 Mar 2016 22:51:57 -0400 Subject: [PATCH] Submission for Machine Learning Toolbox --- learning_curve.py | 12 ++++++++++-- questions.txt | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 questions.txt diff --git a/learning_curve.py b/learning_curve.py index 2364f2c..58e00bf 100644 --- a/learning_curve.py +++ b/learning_curve.py @@ -8,7 +8,7 @@ data = load_digits() print data.DESCR -num_trials = 10 +num_trials = 2500 train_percentages = range(5,95,5) test_accuracies = numpy.zeros(len(train_percentages)) @@ -17,7 +17,15 @@ # You should repeat each training percentage num_trials times to smooth out variability # for consistency with the previous example use model = LogisticRegression(C=10**-10) for your learner -# TODO: your code here +for i in range(len(train_percentages)): + trial_accuracies = numpy.zeros(num_trials) + for trial in range(num_trials): + X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, train_size=train_percentages[i]) + model = LogisticRegression(C=10**-10) + model.fit(X_train, y_train) + trial_accuracies[trial] = model.score(X_test, y_test) + test_accuracies[i] = sum(trial_accuracies) / num_trials + fig = plt.figure() plt.plot(train_percentages, test_accuracies) diff --git a/questions.txt b/questions.txt new file mode 100644 index 0000000..819731e --- /dev/null +++ b/questions.txt @@ -0,0 +1,5 @@ + +1. The general trend of the curve is positive correlation +2. There is more noise towards the two ends of the curve, probably because there is too much imbalance either way. +3. The graph is fairly smooth around 3000 trials +4. As C increases the graph becomes more of a curve than a line, appearing to hit some sort of asymptote. \ No newline at end of file