From 8ba73edad0de934d8dfd791d9beafd147a59e972 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 29 Sep 2024 06:45:29 +0100 Subject: [PATCH] [CT4101]: Add Topic 3 Examples --- .../CT4101_03_CodeSamples+Datasets.zip | Bin .../examples/Decision_trees_impurity.ipynb | 402 +++++++++++ .../{ => examples}/HousePrices-1NN.xlsx | Bin .../materials/topic3/examples/beer_test.csv | 31 + .../topic3/examples/beer_training.csv | 125 ++++ .../topic3/examples/college_athletes.csv | 22 + .../materials/topic3/examples/iris_test.csv | 46 ++ .../topic3/examples/iris_training.csv | 106 +++ .../examples/k-NN_hyperparameters.ipynb | 653 ++++++++++++++++++ .../materials/topic3/examples/weather.csv | 15 + 10 files changed, 1400 insertions(+) rename year4/semester1/CT4101: Machine Learning/materials/topic3/{ => examples}/CT4101_03_CodeSamples+Datasets.zip (100%) create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/Decision_trees_impurity.ipynb rename year4/semester1/CT4101: Machine Learning/materials/topic3/{ => examples}/HousePrices-1NN.xlsx (100%) create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_test.csv create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_training.csv create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/college_athletes.csv create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_test.csv create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_training.csv create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/k-NN_hyperparameters.ipynb create mode 100644 year4/semester1/CT4101: Machine Learning/materials/topic3/examples/weather.csv diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/CT4101_03_CodeSamples+Datasets.zip b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/CT4101_03_CodeSamples+Datasets.zip similarity index 100% rename from year4/semester1/CT4101: Machine Learning/materials/topic3/CT4101_03_CodeSamples+Datasets.zip rename to year4/semester1/CT4101: Machine Learning/materials/topic3/examples/CT4101_03_CodeSamples+Datasets.zip diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/Decision_trees_impurity.ipynb b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/Decision_trees_impurity.ipynb new file mode 100644 index 00000000..4fe2d08b --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/Decision_trees_impurity.ipynb @@ -0,0 +1,402 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Decision tree learning & Impurity (entropy/Gini) examples\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SciPy Entropy Examples\n", + "Below are a few simple examples of calculating entropy using the built in function from scipy.stats" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# entropy of a fair coin, i.e. 50% chance of heads, 50% chance of tails\n", + "from scipy.stats import entropy\n", + "entropy([1/2, 1/2], base=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9709505944546688" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# entropy of a biased coin, e.g. 60% chance of heads, 40% chance of tails\n", + "entropy([60/100, 40/100], base=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# entropy of a coin with both sides the same, e.g. 100% chance of heads\n", + "entropy([1], base=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Scikit-learn decision tree examples" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " calorific_value nitrogen turbidity style alcohol sugars bitterness \\\n", + "0 45.305310 0.459548 1.917273 ale 4.227692 16.67 12.568947 \n", + "1 43.889381 0.548977 3.186364 ale 4.289231 16.73 14.974000 \n", + "2 41.588496 0.542847 1.568182 ale 4.344615 16.48 11.848789 \n", + "3 44.553097 0.480301 1.871818 ale 4.424615 18.59 13.879632 \n", + "4 41.013274 0.441860 2.345455 ale 4.264615 16.35 12.186053 \n", + "\n", + " beer_id colour degree_of_fermentation \n", + "0 167 11.04 62.178571 \n", + "1 128 13.44 63.032857 \n", + "2 88 14.04 63.468571 \n", + "3 147 12.48 63.531429 \n", + "4 74 12.12 63.747143 \n", + "(124, 10)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# details for beer dataset\n", + "training_file = \"beer_training.csv\"\n", + "test_file = \"beer_test.csv\"\n", + "independent_cols = [\"calorific_value\", \"nitrogen\", \"turbidity\", \"alcohol\", \"sugars\", \"bitterness\", \"beer_id\", \n", + " \"colour\", \"degree_of_fermentation\"]\n", + "dependent_col = \"style\"\n", + "\n", + "# Here we load our training dataset in from the training file using the pandas library\n", + "df_training = pd.read_csv(training_file)\n", + "print(df_training.head())\n", + "print(df_training.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " calorific_value nitrogen turbidity alcohol sugars bitterness \\\n", + "0 45.305310 0.459548 1.917273 4.227692 16.67 12.568947 \n", + "1 43.889381 0.548977 3.186364 4.289231 16.73 14.974000 \n", + "2 41.588496 0.542847 1.568182 4.344615 16.48 11.848789 \n", + "3 44.553097 0.480301 1.871818 4.424615 18.59 13.879632 \n", + "4 41.013274 0.441860 2.345455 4.264615 16.35 12.186053 \n", + "\n", + " beer_id colour degree_of_fermentation \n", + "0 167 11.04 62.178571 \n", + "1 128 13.44 63.032857 \n", + "2 88 14.04 63.468571 \n", + "3 147 12.48 63.531429 \n", + "4 74 12.12 63.747143 \n", + "(124, 9)\n" + ] + } + ], + "source": [ + "# set up a matrix X containing the independent variables from the training data\n", + "X_training = df_training.loc[:,independent_cols]\n", + "print(X_training.head())\n", + "print(X_training.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 ale\n", + "1 ale\n", + "2 ale\n", + "3 ale\n", + "4 ale\n", + "Name: style, dtype: object\n", + "(124,)\n" + ] + } + ], + "source": [ + "# Set up a vector y containing the dependent variable / target attribute for the training data\n", + "y_training = df_training.loc[:,dependent_col]\n", + "print(y_training.head())\n", + "print(y_training.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Value counts\n", + "lager 44\n", + "ale 42\n", + "stout 38\n", + "Name: style, dtype: int64\n", + "\n", + "Normalised value counts (i.e. probabilities)\n", + "lager 0.354839\n", + "ale 0.338710\n", + "stout 0.306452\n", + "Name: style, dtype: float64\n", + "\n", + "Training set entropy: 1.5823126935513492\n" + ] + } + ], + "source": [ + "# compute the entropy of the training dataset\n", + "print(\"Value counts\")\n", + "print(df_training[\"style\"].value_counts())\n", + "print(\"\\nNormalised value counts (i.e. probabilities)\")\n", + "print(df_training[\"style\"].value_counts(normalize=True))\n", + "\n", + "training_entropy = entropy(df_training[\"style\"].value_counts(normalize=True), base=2)\n", + "print(\"\\nTraining set entropy:\",training_entropy)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on training data: 1.0\n" + ] + }, + { + "data": { + "text/plain": [ + "[Text(243.4909090909091, 587.0880000000001, 'X[7] <= 9.42\\nentropy = 1.582\\nsamples = 124\\nvalue = [42, 44, 38]'),\n", + " Text(121.74545454545455, 456.624, 'X[5] <= 9.667\\nentropy = 0.316\\nsamples = 35\\nvalue = [0, 2, 33]'),\n", + " Text(60.872727272727275, 326.1600000000001, 'entropy = 0.0\\nsamples = 33\\nvalue = [0, 0, 33]'),\n", + " Text(182.61818181818182, 326.1600000000001, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2, 0]'),\n", + " Text(365.23636363636365, 456.624, 'X[3] <= 3.934\\nentropy = 1.256\\nsamples = 89\\nvalue = [42, 42, 5]'),\n", + " Text(304.3636363636364, 326.1600000000001, 'entropy = 0.0\\nsamples = 38\\nvalue = [0, 38, 0]'),\n", + " Text(426.1090909090909, 326.1600000000001, 'X[5] <= 8.226\\nentropy = 0.847\\nsamples = 51\\nvalue = [42, 4, 5]'),\n", + " Text(304.3636363636364, 195.69600000000003, 'X[4] <= 17.46\\nentropy = 0.863\\nsamples = 7\\nvalue = [0, 2, 5]'),\n", + " Text(243.4909090909091, 65.23200000000008, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2, 0]'),\n", + " Text(365.23636363636365, 65.23200000000008, 'entropy = 0.0\\nsamples = 5\\nvalue = [0, 0, 5]'),\n", + " Text(547.8545454545455, 195.69600000000003, 'X[7] <= 15.54\\nentropy = 0.267\\nsamples = 44\\nvalue = [42, 2, 0]'),\n", + " Text(486.9818181818182, 65.23200000000008, 'entropy = 0.0\\nsamples = 42\\nvalue = [42, 0, 0]'),\n", + " Text(608.7272727272727, 65.23200000000008, 'entropy = 0.0\\nsamples = 2\\nvalue = [0, 2, 0]')]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn import tree\n", + "from sklearn import metrics\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "# create a model using the default settings (using Gini index)\n", + "model = DecisionTreeClassifier(criterion=\"entropy\")\n", + "model.fit(X_training, y_training)\n", + "\n", + "# compute the accuracy on the training predictions\n", + "predictions_training = model.predict(X_training)\n", + "accuracy_training = metrics.accuracy_score(y_training, predictions_training)\n", + "print(\"Accuracy on training data:\",accuracy_training)\n", + "\n", + "plt.figure(figsize=(12,12))\n", + "tree.plot_tree(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see above that the netropy at the root node is the same as the overall entropy of the training set, i.e. 1.582" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on training data: 1.0\n" + ] + }, + { + "data": { + "text/plain": [ + "[Text(362.70000000000005, 597.96, 'X[1] <= 0.36\\ngini = 0.665\\nsamples = 124\\nvalue = [42, 44, 38]'),\n", + " Text(223.20000000000002, 489.24, 'X[7] <= 9.42\\ngini = 0.552\\nsamples = 84\\nvalue = [5, 42, 37]'),\n", + " Text(111.60000000000001, 380.52000000000004, 'X[5] <= 9.667\\ngini = 0.111\\nsamples = 34\\nvalue = [0, 2, 32]'),\n", + " Text(55.800000000000004, 271.8, 'gini = 0.0\\nsamples = 32\\nvalue = [0, 0, 32]'),\n", + " Text(167.4, 271.8, 'gini = 0.0\\nsamples = 2\\nvalue = [0, 2, 0]'),\n", + " Text(334.8, 380.52000000000004, 'X[3] <= 3.934\\ngini = 0.34\\nsamples = 50\\nvalue = [5, 40, 5]'),\n", + " Text(279.0, 271.8, 'gini = 0.0\\nsamples = 37\\nvalue = [0, 37, 0]'),\n", + " Text(390.6, 271.8, 'X[5] <= 8.714\\ngini = 0.651\\nsamples = 13\\nvalue = [5, 3, 5]'),\n", + " Text(279.0, 163.07999999999998, 'X[2] <= 1.615\\ngini = 0.408\\nsamples = 7\\nvalue = [0, 2, 5]'),\n", + " Text(223.20000000000002, 54.360000000000014, 'gini = 0.0\\nsamples = 2\\nvalue = [0, 2, 0]'),\n", + " Text(334.8, 54.360000000000014, 'gini = 0.0\\nsamples = 5\\nvalue = [0, 0, 5]'),\n", + " Text(502.20000000000005, 163.07999999999998, 'X[7] <= 14.94\\ngini = 0.278\\nsamples = 6\\nvalue = [5, 1, 0]'),\n", + " Text(446.40000000000003, 54.360000000000014, 'gini = 0.0\\nsamples = 5\\nvalue = [5, 0, 0]'),\n", + " Text(558.0, 54.360000000000014, 'gini = 0.0\\nsamples = 1\\nvalue = [0, 1, 0]'),\n", + " Text(502.20000000000005, 489.24, 'X[2] <= 0.969\\ngini = 0.141\\nsamples = 40\\nvalue = [37, 2, 1]'),\n", + " Text(446.40000000000003, 380.52000000000004, 'gini = 0.0\\nsamples = 2\\nvalue = [0, 2, 0]'),\n", + " Text(558.0, 380.52000000000004, 'X[7] <= 9.06\\ngini = 0.051\\nsamples = 38\\nvalue = [37, 0, 1]'),\n", + " Text(502.20000000000005, 271.8, 'gini = 0.0\\nsamples = 1\\nvalue = [0, 0, 1]'),\n", + " Text(613.8000000000001, 271.8, 'gini = 0.0\\nsamples = 37\\nvalue = [37, 0, 0]')]" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn import tree\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "# create a model using the default settings (using Gini index)\n", + "model = DecisionTreeClassifier()\n", + "model.fit(X_training, y_training)\n", + "\n", + "# compute the accuracy on the training predictions\n", + "predictions_training = model.predict(X_training)\n", + "accuracy_training = metrics.accuracy_score(y_training, predictions_training)\n", + "print(\"Accuracy on training data:\",accuracy_training)\n", + "\n", + "plt.figure(figsize=(12,12))\n", + "tree.plot_tree(model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ct4101", + "language": "python", + "name": "ct4101" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/HousePrices-1NN.xlsx b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/HousePrices-1NN.xlsx similarity index 100% rename from year4/semester1/CT4101: Machine Learning/materials/topic3/HousePrices-1NN.xlsx rename to year4/semester1/CT4101: Machine Learning/materials/topic3/examples/HousePrices-1NN.xlsx diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_test.csv b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_test.csv new file mode 100644 index 00000000..784cebfe --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_test.csv @@ -0,0 +1,31 @@ +calorific_value,nitrogen,turbidity,style,alcohol,sugars,bitterness,beer_id,colour,degree_of_fermentation +41.72123894,0.503275756,2.628181818,ale,4.015384615,16.73,10.45278947,93,13.44,55.33714286 +42.42920354,0.52551218,1.776363636,ale,4.092307692,16.72,10.99952632,103,12.24,58.38 +45.88053097,0.443232788,2.628181818,ale,4.276923077,16.68,13.45636842,178,10.92,58.38285714 +45.30530973,0.471667792,1.806363636,ale,4.126153846,18.84,9.202736842,166,10.92,58.52571429 +38.97787611,0.392845762,2.272727273,ale,4.015384615,16.77,9.457894737,44,10.56,58.9 +41.1460177,0.396443611,0.885454545,ale,4.021538462,16.5,13.02610526,78,14.16,59.41428571 +45.39380531,0.466171648,1.980909091,ale,4.563076923,16.64,11.44852632,170,12.96,60.01857143 +38.2699115,0.472478164,2.099090909,ale,4.313846154,16.68,12.32826316,31,12.48,60.35571429 +43.53539823,0.435870902,1.342727273,ale,4.061538462,16.78,10.81915789,122,12.6,60.46571429 +37.29646018,0.367897245,2.104545455,ale,4.326153846,16.63,13.30068421,4,13.08,61.84571429 +42.29646018,0.250667315,1.019090909,lager,3.806153846,15.94,2.924894737,101,12.6,37.03428571 +44.86283186,0.185199475,2.102727273,lager,4.009230769,15.9,6.608789474,155,14.28,46.39857143 +40.88053097,0.264555015,1.06,lager,3.716923077,17.08,7.411210526,69,15.24,47.32857143 +40.52654867,0.237383712,1.767272727,lager,4.033846154,16.01,13.27905263,64,13.44,48.32857143 +41.27876106,0.31818079,1.366363636,lager,3.756923077,16.19,8.915368421,81,15.36,50.07857143 +42.5619469,0.238442508,0.774545455,lager,3.526153846,18.74,8.155894737,107,9,51.59714286 +41.58849558,0.211766681,1.461818182,lager,3.587692308,16.88,5.340684211,89,14.76,52.15428571 +39.24336283,0.156837113,0.689090909,lager,3.636923077,16.72,6.820842105,46,11.28,53.18857143 +45.0840708,0.298799689,1.331818182,lager,4.206153846,16.25,10.28584211,162,14.76,53.38285714 +43.97787611,0.271177799,1.419090909,lager,3.849230769,16.73,9.530947368,131,12.48,53.57428571 +37.96017699,0.228401767,1.559090909,stout,3.950769231,17.96,5.386315789,19,10.68,71.69857143 +37.34070796,0.221810849,1.638181818,stout,3.855384615,20.51,5.596842105,6,9.84,73.13714286 +38.13716814,0.233793401,4.762727273,stout,4.147692308,16.67,6.252105263,23,6.84,73.39428571 +41.45575221,0.260993144,1.352727273,stout,3.978461538,17.81,3.865789474,85,9.24,74.15714286 +44.02212389,0.172410662,2.191818182,stout,4.033846154,16.9,7.546947368,134,7.32,74.67857143 +39.95132743,0.272586785,5.217272727,stout,4.412307692,16.68,6.008368421,54,6.84,74.75714286 +40.03982301,0.347918901,2.859090909,stout,4.178461538,17.58,7.340842105,56,8.88,74.88714286 +43.97787611,0.266770262,3.775454545,stout,4.347692308,19.1,7.356,132,7.32,75.89428571 +45.34955752,0.255529552,1.301818182,stout,4.16,18.17,3.243578947,169,10.68,76.18285714 +41.19026549,0.283402606,2.620909091,stout,4.123076923,19.6,7.554947368,80,8.04,79.13428571 diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_training.csv b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_training.csv new file mode 100644 index 00000000..c8ae9e95 --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/beer_training.csv @@ -0,0 +1,125 @@ +calorific_value,nitrogen,turbidity,style,alcohol,sugars,bitterness,beer_id,colour,degree_of_fermentation +45.30530973,0.459548179,1.917272727,ale,4.227692308,16.67,12.56894737,167,11.04,62.17857143 +43.88938053,0.548977011,3.186363636,ale,4.289230769,16.73,14.974,128,13.44,63.03285714 +41.58849558,0.542846961,1.568181818,ale,4.344615385,16.48,11.84878947,88,14.04,63.46857143 +44.55309735,0.480300917,1.871818182,ale,4.424615385,18.59,13.87963158,147,12.48,63.53142857 +41.01327434,0.441860465,2.345454545,ale,4.264615385,16.35,12.18605263,74,12.12,63.74714286 +42.78318584,0.360463403,1.889090909,ale,4.172307692,16.71,15.09373684,109,11.4,63.91285714 +44.77433628,0.431959526,1.141818182,ale,4.218461538,16.86,12.82273684,153,13.32,64.03428571 +44.15486726,0.493446774,2.443636364,ale,4.252307692,16.75,14.17294737,138,12.12,65.62571429 +37.60619469,0.485480107,2.554545455,ale,4.338461538,17.02,13.26252632,12,12.84,65.82857143 +45.83628319,0.520862387,1.886363636,ale,4.230769231,16.73,11.50336842,177,13.8,66.47 +44.95132743,0.586365521,2.264545455,ale,4.369230769,16.76,14.82478947,158,12.6,66.75 +40.26106195,0.609585356,2.346363636,ale,4.421538462,16.95,11.819,60,10.32,67.55714286 +37.42920354,0.429247433,2.240909091,ale,4.378461538,16.71,10.01231579,8,12.48,67.81 +44.46460177,0.363074414,1.814545455,ale,4.147692308,16.81,9.659052632,143,12.48,67.89857143 +37.73893805,0.571386474,1.659090909,ale,4.427692308,16.87,12.812,14,12.24,67.95 +43.44690265,0.573324173,2.862727273,ale,4.221538462,16.43,13.74542105,120,10.68,68.07142857 +41.67699115,0.295421137,1.814545455,ale,4.375384615,18.99,10.31873684,91,10.68,68.42714286 +44.77433628,0.450176637,1.605454545,ale,4.372307692,19.04,13.32026316,154,10.44,68.45142857 +45.21681416,0.742774319,2.853636364,ale,4.366153846,16.59,12.49878947,164,14.76,68.67428571 +37.51769912,0.435485418,1.812727273,ale,4.172307692,16.73,10.67063158,10,11.76,69.16285714 +43.09292035,0.335413804,1.754545455,ale,4.196923077,18.1,11.07721053,116,11.52,69.43428571 +42.11946903,0.391087955,1.369090909,ale,3.953846154,16.6,10.24173684,100,13.08,69.95428571 +44.06637168,0.454088014,2.124545455,ale,4.270769231,16.89,11.19389474,136,10.56,70.21571429 +45.03982301,0.532171628,1.756363636,ale,4.326153846,17.15,11.62310526,160,12.72,70.87 +40.21681416,0.505285412,1.355454545,ale,4.12,16.77,13.74557895,59,11.04,70.88285714 +38.2699115,0.489920882,1.625454545,ale,4.015384615,16.65,9.954052632,30,13.44,70.99285714 +44.02212389,0.495629469,2.28,ale,4.255384615,16.57,14.01368421,133,13.56,71.05428571 +44.42035398,0.382415425,1.09,ale,4.153846154,16.81,9.406789474,139,13.44,71.47 +38.84513274,0.562436395,2.256363636,ale,4.255384615,16.65,10.42689474,41,14.88,71.54285714 +41.45575221,0.554015714,2.251818182,ale,4.089230769,16.97,10.48668421,84,12.84,72.56 +38.18141593,0.50525286,2.132727273,ale,4.224615385,16.5,10.18026316,24,14.28,73.1 +40.65929204,0.46462286,1.187272727,ale,4.156923077,16.8,12.57984211,66,13.2,73.17857143 +39.28761062,0.551372151,1.583636364,ale,4.233846154,16.53,8.651894737,47,15,73.21428571 +44.90707965,0.475125154,1.575454545,ale,4.049230769,17.36,14.35978947,156,12.36,73.43285714 +43.62389381,0.349082206,1.446363636,ale,3.978461538,18.8,9.628894737,125,12.36,74.06428571 +44.50884956,0.575722739,2.030909091,ale,4.236923077,16.9,12.54373684,146,13.56,74.18857143 +38.18141593,0.581744854,1.984545455,ale,4.4,16.92,10.52347368,25,12.84,74.35428571 +38.04867257,0.56120456,2.814545455,ale,4.193846154,16.81,12.49468421,20,15.36,75.67142857 +39.28761062,0.383650687,1.316363636,ale,4.267692308,16.9,9.522789474,48,15,75.82857143 +38.71238938,0.308351123,1.650909091,ale,4.073846154,17.59,12.60215789,38,12.48,78.14714286 +37.96017699,0.397214579,2.142727273,ale,4.086153846,16.64,11.25536842,18,13.08,78.70857143 +41.8539823,0.312053481,1.081818182,ale,4.015384615,17.05,10.78168421,95,13.56,87.02571429 +41.94247788,0.183229909,1.411818182,lager,3.806153846,16.17,7.673684211,96,13.44,53.66857143 +38.53539823,0.257081082,1.250909091,lager,3.876923077,16.34,6.847578947,36,12.48,54.01428571 +44.55309735,0.219026799,0.939090909,lager,3.636923077,16.47,6.328421053,148,11.4,54.53 +45.52654867,0.189464469,1.750909091,lager,3.889230769,16.36,6.581526316,172,11.76,54.91142857 +45.70353982,0.137596499,0.891818182,lager,3.76,16.29,10.27136842,176,10.32,54.95142857 +39.90707965,0.107012723,0.779090909,lager,3.692307692,15.92,7.479684211,53,16.56,55.91428571 +41.58849558,0.341122392,1.469090909,lager,3.781538462,16.61,6.477789474,90,10.872,60.89857143 +42.03097345,0.282453802,1.708181818,lager,3.913846154,16.81,12.10615789,98,13.92,61.05714286 +40.34955752,0.197146734,0.629090909,lager,3.904615385,16.53,5.315473684,62,11.52,61.59142857 +44.11061947,0.206482297,1.352727273,lager,3.898461538,15.98,8.277157895,137,14.76,61.62285714 +40.17256637,0.141298858,0.690909091,lager,3.393846154,16.51,9.779631579,58,20.52,61.91428571 +38.18141593,0.169096529,1.452727273,lager,3.824615385,16.53,13.81763158,27,8.28,61.94428571 +43.53539823,0.183369712,1.58,lager,4.150769231,16.66,7.230315789,124,11.76,62.30857143 +38.53539823,0.286716054,1.259090909,lager,3.793846154,16.1,4.326368421,35,15,62.56 +38.97787611,0.167299317,0.290909091,lager,3.806153846,16.63,10.85815789,45,10.68,62.64285714 +40.74778761,0.109410604,1.541818182,lager,3.575384615,16.99,11.21652632,67,13.92,63.30285714 +42.29646018,0.12751978,1.431818182,lager,3.821538462,17.55,9.598736842,102,10.32,63.34714286 +37.87168142,0.238338684,0.672727273,lager,3.716923077,16.33,5.665842105,16,12.84,63.41428571 +44.46460177,0.205390949,0.662727273,lager,3.716923077,16.83,5.123684211,144,12.96,63.44 +37.38495575,0.176077042,1.130909091,lager,3.741538462,16.61,9.554789474,7,15.96,64.65571429 +45.0840708,0.390027104,0.847272727,lager,3.68,16.09,11.42347368,161,11.88,64.72857143 +40.0840708,0.164977505,0.534545455,lager,3.907692308,18.87,9.168157895,57,14.28,65.8 +41.94247788,0.352781138,0.772727273,lager,4.104615385,15.94,5.924736842,97,12.24,65.94857143 +41.41150442,0.202430433,0.959090909,lager,3.692307692,16.51,6.085526316,83,12.6,66.08285714 +38.80088496,0.283330649,1.265454545,lager,3.704615385,19.3,6.477631579,40,9.48,66.17428571 +41.76548673,0.143658705,1.118181818,lager,3.652307692,19.31,9.866684211,94,9,66.27285714 +38.93362832,0.194925319,0.758181818,lager,3.796923077,17.45,11.17257895,43,9.6,67.04857143 +40.96902655,0.246442412,0.701818182,lager,3.523076923,17.4,10.43847368,72,9.6,67.34 +45.26106195,0.138479515,0.864545455,lager,3.716923077,16.39,11.46742105,165,11.16,67.55714286 +43.35840708,0.31183761,0.928181818,lager,3.581538462,17.06,6.081105263,118,12,67.84857143 +41.41150442,0.19404436,0.77,lager,3.510769231,15.74,7.862578947,82,13.2,68.72857143 +42.78318584,0.28745447,0.65,lager,3.716923077,16.13,4.950631579,110,15.72,68.91285714 +43.35840708,0.144433099,1.222727273,lager,3.744615385,16.45,10.37236842,119,17.4,70.13857143 +37.34070796,0.265611069,1.202727273,lager,3.806153846,16.21,9.162368421,5,14.28,70.23428571 +39.46460177,0.197993085,0.619090909,lager,3.643076923,15.89,11.74573684,49,9.48,70.68857143 +43.04867257,0.197672705,1.229090909,lager,4.264615385,16.51,13.65547368,113,16.32,73.00428571 +37.07522124,0.396231167,0.738181818,lager,3.996923077,16.67,11.74847368,1,15.72,73.02857143 +39.90707965,0.241683999,1.155454545,lager,3.584615385,16.67,6.359421053,52,16.32,73.16857143 +38.75663717,0.25024928,0.793636364,lager,3.572307692,16.35,13.31515789,39,11.52,73.98571429 +39.59734513,0.180265967,0.856363636,lager,3.821538462,19.43,9.708263158,50,11.04,74.42428571 +38.18141593,0.196229796,0.940909091,lager,3.627692308,17.13,11.25978947,26,11.64,76.31714286 +43.49115044,0.258598004,1.139090909,lager,3.633846154,17.12,8.174263158,121,11.4,76.78 +38.09292035,0.261521856,1.731818182,lager,3.618461538,17.68,7.556157895,21,14.76,81.21142857 +45.70353982,0.22417001,2.661818182,lager,3.556923077,17.05,20.06378947,175,11.16,87.23857143 +37.65044248,0.244681179,2.413636364,stout,3.870769231,16.29,3.219052632,13,6.96,57.21428571 +38.62389381,0.293868922,3.001818182,stout,3.876923077,17.46,2.730210526,37,8.76,60.37142857 +37.42920354,0.359055102,1.194545455,stout,4.049230769,18.57,4.336578947,9,7.2,60.70714286 +44.59734513,0.394747482,2.485454545,stout,3.769230769,18.88,4.523263158,149,7.8,60.88571429 +41.01327434,0.267123194,2.012727273,stout,4.150769231,18.59,6.593263158,75,9.72,61.93 +44.02212389,0.169803078,1.311818182,stout,4.270769231,20.04,6.159263158,135,6.96,62.03857143 +43.04867257,0.2685709,3.980909091,stout,3.944615385,18.37,7.098631579,115,8.64,62.47142857 +43.93362832,0.254582118,2.451818182,stout,3.849230769,16.24,5.219052632,130,9,62.60714286 +44.42035398,0.295698685,3.503636364,stout,4.24,17.76,7.318526316,142,8.4,62.61428571 +38.35840708,0.282170771,4.255454545,stout,4.070769231,18.3,4.046684211,33,6.72,62.84571429 +44.68584071,0.185837494,2.837272727,stout,4.224615385,19.36,6.667526316,152,9.36,62.84857143 +45.57079646,0.168747709,3.989999636,stout,3.929230769,17.39,6.265210526,174,6.84,62.93142857 +44.42035398,0.331995847,3.549090909,stout,4.083076923,19.28,4.823210526,141,7.08,62.93428571 +42.47345133,0.296829438,2.902727273,stout,4.052307692,20.19,3.847736842,104,7.2,63.66857143 +45.34955752,0.280161115,1.800909091,stout,3.956923077,16.35,4.348684211,168,9.12,63.66857143 +40.92477876,0.256869323,1.65,stout,4.190769231,19.95,7.374736842,71,10.92,63.90714286 +40.03982301,0.303067424,1.686363636,stout,4.110769231,17.56,3.55,55,8.4,64.10714286 +43.13716814,0.20528644,2.611818182,stout,3.803076923,18.83,3.167789474,117,6.72,64.31714286 +42.73893805,0.259444355,2.961818182,stout,4.098461538,18.24,5.145473684,108,6.6,64.46 +43.93362832,0.248870104,2.31,stout,3.753846154,18.03,3.413736842,129,7.92,65.15428571 +42.82743363,0.205390949,3.226363636,stout,4.258461538,19.12,7.099315789,112,6.84,65.23142857 +42.82743363,0.313775309,2.36,stout,3.987692308,18.45,3.801578947,111,8.16,65.33571429 +38.44690265,0.230198978,2.308181818,stout,3.907692308,18.55,8.877894737,34,9.36,65.43428571 +44.68584071,0.334920385,3.651818182,stout,4.052307692,17.59,5.002736842,151,7.2,65.64714286 +41.1460177,0.270822126,1.742727273,stout,3.941538462,17.31,5.800052632,79,7.92,65.85714286 +41.45575221,0.262541932,3.018181818,stout,4.024615385,18.9,9.466894737,86,6.84,66.57714286 +43.04867257,0.180269393,4.047272727,stout,3.935384615,17.67,5.900210526,114,5.76,67.10285714 +43.84513274,0.31127566,3.05,stout,4.218461538,20.65,5.469947368,127,7.68,67.15 +42.03097345,0.245981545,1.543636364,stout,3.963076923,17.99,8.782,99,8.88,67.51428571 +44.95132743,0.304969144,2.744545455,stout,4.123076923,18.91,6.272368421,159,8.4,68.16 +38.18141593,0.263215244,1.828181818,stout,4.212307692,18.26,5.262315789,29,11.52,68.80857143 +40.88053097,0.292846105,3.717272727,stout,4.356923077,17.51,4.064736842,70,7.44,69.30285714 +37.51769912,0.237528312,3.121818182,stout,3.96,19.61,3.751842105,11,6.48,69.70285714 +41.05752212,0.310112356,1.44,stout,3.769230769,19.72,5.720157895,77,9,70.23714286 +41.5,0.275860828,2.229090909,stout,3.953846154,18.27,4.228421053,87,10.44,70.68857143 +43.71238938,0.233296555,2.627272727,stout,4.153846154,18.12,6.288052632,126,7.08,70.94 +38.13716814,0.328714951,3.753636364,stout,4.138461538,18.7,7.799368421,22,10.2,71.17142857 +42.51769912,0.352644077,2.937272727,stout,3.830769231,18.03,6.692736842,106,8.04,71.45142857 diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/college_athletes.csv b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/college_athletes.csv new file mode 100644 index 00000000..324816e5 --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/college_athletes.csv @@ -0,0 +1,22 @@ +ID,Speed,Agility,Draft +1,2.50,6.00,no +2,3.75,8.00,no +3,2.25,5.50,no +4,3.25,8.25,no +5,2.75,7.50,no +6,4.50,5.00,no +7,3.50,5.25,no +8,3.00,3.25,no +9,4.00,4.00,no +10,4.25,3.75,no +11,2.00,2.00,no +12,5.00,2.50,no +13,8.25,8.5,no +14,5.75,8.75,yes +15,4.75,6.25,yes +16,5.50,6.75,yes +17,5.25,9.50,yes +18,7.00,4.25,yes +19,7.50,8.00,yes +20,7.25,5.75,yes +21,6.75,3.00,yes \ No newline at end of file diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_test.csv b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_test.csv new file mode 100644 index 00000000..381e98be --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_test.csv @@ -0,0 +1,46 @@ +sepal_length,sepal_width,petal_length,petal_width,class +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica \ No newline at end of file diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_training.csv b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_training.csv new file mode 100644 index 00000000..1d50c62a --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/iris_training.csv @@ -0,0 +1,106 @@ +sepal_length,sepal_width,petal_length,petal_width,class +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/k-NN_hyperparameters.ipynb b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/k-NN_hyperparameters.ipynb new file mode 100644 index 00000000..c7ebc525 --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/k-NN_hyperparameters.ipynb @@ -0,0 +1,653 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this worked example, we seek to apply the k-NN algorithm to a dataset called beers.\n", + "\n", + "The dataset has already been split into two different sets: one for training (beer_training.csv) and one for testing (beer_test.csv)\n", + "\n", + "The dependent variable that we are trying to predict is style, which can be one of 3 classes: ale, lager or stout.\n", + "\n", + "In this example we will see the importance of maintaining separate training and test data, as well as how to tune the hyperparameters of a machine learning model." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " calorific_value nitrogen turbidity style alcohol sugars bitterness \\\n", + "0 45.305310 0.459548 1.917273 ale 4.227692 16.67 12.568947 \n", + "1 43.889381 0.548977 3.186364 ale 4.289231 16.73 14.974000 \n", + "2 41.588496 0.542847 1.568182 ale 4.344615 16.48 11.848789 \n", + "3 44.553097 0.480301 1.871818 ale 4.424615 18.59 13.879632 \n", + "4 41.013274 0.441860 2.345455 ale 4.264615 16.35 12.186053 \n", + "\n", + " beer_id colour degree_of_fermentation \n", + "0 167 11.04 62.178571 \n", + "1 128 13.44 63.032857 \n", + "2 88 14.04 63.468571 \n", + "3 147 12.48 63.531429 \n", + "4 74 12.12 63.747143 \n", + "(124, 10)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# details for iris dataset - this is a very simple dataset that is easy to get good results on\n", + "# training_file = \"iris_training.csv\"\n", + "# test_file = \"iris_test.csv\"\n", + "# independent_cols = [\"sepal_length\",\"sepal_width\",\"petal_length\",\"petal_width\"]\n", + "# dependent_col = \"class\"\n", + "\n", + "# details for beer dataset\n", + "training_file = \"beer_training.csv\"\n", + "test_file = \"beer_test.csv\"\n", + "independent_cols = [\"calorific_value\", \"nitrogen\", \"turbidity\", \"alcohol\", \"sugars\", \"bitterness\", \"beer_id\", \n", + " \"colour\", \"degree_of_fermentation\"]\n", + "dependent_col = \"style\"\n", + "\n", + "# Here we load our training dataset in from the training file using the pandas library\n", + "df_training = pd.read_csv(training_file)\n", + "print(df_training.head())\n", + "print(df_training.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " calorific_value nitrogen turbidity alcohol sugars bitterness \\\n", + "0 45.305310 0.459548 1.917273 4.227692 16.67 12.568947 \n", + "1 43.889381 0.548977 3.186364 4.289231 16.73 14.974000 \n", + "2 41.588496 0.542847 1.568182 4.344615 16.48 11.848789 \n", + "3 44.553097 0.480301 1.871818 4.424615 18.59 13.879632 \n", + "4 41.013274 0.441860 2.345455 4.264615 16.35 12.186053 \n", + "\n", + " beer_id colour degree_of_fermentation \n", + "0 167 11.04 62.178571 \n", + "1 128 13.44 63.032857 \n", + "2 88 14.04 63.468571 \n", + "3 147 12.48 63.531429 \n", + "4 74 12.12 63.747143 \n", + "(124, 9)\n" + ] + } + ], + "source": [ + "# set up a matrix X containing the independent variables from the training data\n", + "X_training = df_training.loc[:,independent_cols]\n", + "print(X_training.head())\n", + "print(X_training.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 ale\n", + "1 ale\n", + "2 ale\n", + "3 ale\n", + "4 ale\n", + "Name: style, dtype: object\n", + "(124,)\n" + ] + } + ], + "source": [ + "# Set up a vector y containing the dependent variable / target attribute for the training data\n", + "y_training = df_training.loc[:,dependent_col]\n", + "print(y_training.head())\n", + "print(y_training.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " calorific_value nitrogen turbidity style alcohol sugars bitterness \\\n", + "0 41.721239 0.503276 2.628182 ale 4.015385 16.73 10.452789 \n", + "1 42.429204 0.525512 1.776364 ale 4.092308 16.72 10.999526 \n", + "2 45.880531 0.443233 2.628182 ale 4.276923 16.68 13.456368 \n", + "3 45.305310 0.471668 1.806364 ale 4.126154 18.84 9.202737 \n", + "4 38.977876 0.392846 2.272727 ale 4.015385 16.77 9.457895 \n", + "\n", + " beer_id colour degree_of_fermentation \n", + "0 93 13.44 55.337143 \n", + "1 103 12.24 58.380000 \n", + "2 178 10.92 58.382857 \n", + "3 166 10.92 58.525714 \n", + "4 44 10.56 58.900000 \n", + "(30, 10)\n" + ] + } + ], + "source": [ + "# Next we load our test dataset in from the file iris_test.csv\n", + "df_test = pd.read_csv(test_file)\n", + "print(df_test.head())\n", + "print(df_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " calorific_value nitrogen turbidity alcohol sugars bitterness \\\n", + "0 41.721239 0.503276 2.628182 4.015385 16.73 10.452789 \n", + "1 42.429204 0.525512 1.776364 4.092308 16.72 10.999526 \n", + "2 45.880531 0.443233 2.628182 4.276923 16.68 13.456368 \n", + "3 45.305310 0.471668 1.806364 4.126154 18.84 9.202737 \n", + "4 38.977876 0.392846 2.272727 4.015385 16.77 9.457895 \n", + "\n", + " beer_id colour degree_of_fermentation \n", + "0 93 13.44 55.337143 \n", + "1 103 12.24 58.380000 \n", + "2 178 10.92 58.382857 \n", + "3 166 10.92 58.525714 \n", + "4 44 10.56 58.900000 \n", + "(30, 9)\n" + ] + } + ], + "source": [ + "# set up a matrix X containing the independent variables from the test data\n", + "X_test = df_test.loc[:,independent_cols]\n", + "print(X_test.head())\n", + "print(X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 ale\n", + "1 ale\n", + "2 ale\n", + "3 ale\n", + "4 ale\n", + "Name: style, dtype: object\n", + "(30,)\n" + ] + } + ], + "source": [ + "# Set up a vector y containing the dependent variable / target attribute for the training data\n", + "y_test = df_test.loc[:,dependent_col]\n", + "print(y_test.head())\n", + "print(y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To explote the effect of hyperparameters on a simple machine learning model, let's experiment with the built-in k-NN implementation in scikit-learn.\n", + "\n", + "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html\n", + "\n", + "First we'll create a model using the default settings" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on training data: 0.7580645161290323\n", + "Accuracy on test data: 0.4\n" + ] + } + ], + "source": [ + "from sklearn import neighbors, metrics\n", + "\n", + "# create a model using the default settings for k-NN, n_neighbors=5, weights=uniform, p=2 (Euclidean distance)\n", + "model = neighbors.KNeighborsClassifier()\n", + "model.fit(X_training, y_training)\n", + "\n", + "# compute the predictions for the training and test sets\n", + "predictions_training = model.predict(X_training)\n", + "predictions_test = model.predict(X_test)\n", + "\n", + "# compute the accuracy on the training and test set predictions\n", + "accuracy_training = metrics.accuracy_score(y_training, predictions_training)\n", + "accuracy_test = metrics.accuracy_score(y_test, predictions_test)\n", + "print(\"Accuracy on training data:\",accuracy_training)\n", + "print(\"Accuracy on test data:\",accuracy_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29]\n", + "[1.0, 0.8306451612903226, 0.7580645161290323, 0.6854838709677419, 0.6370967741935484, 0.6048387096774194, 0.5806451612903226, 0.5967741935483871, 0.5645161290322581, 0.5241935483870968, 0.5161290322580645, 0.46774193548387094, 0.46774193548387094, 0.46774193548387094, 0.43548387096774194]\n", + "[0.5666666666666667, 0.4666666666666667, 0.4, 0.4, 0.4, 0.43333333333333335, 0.4, 0.3333333333333333, 0.43333333333333335, 0.4, 0.4, 0.4, 0.36666666666666664, 0.3, 0.3333333333333333]\n" + ] + } + ], + "source": [ + "# Now let's evaluate the effect of using different k values\n", + "# start at k=1 and test all odd k values up to 21\n", + "k_values = list(range(1,31,2))\n", + "print(k_values)\n", + "\n", + "accuracy_training_k = []\n", + "accuracy_test_k = []\n", + "for k in k_values:\n", + " model_k = neighbors.KNeighborsClassifier(k)\n", + " model_k.fit(X_training, y_training)\n", + "\n", + " # compute the predictions for the training and test sets\n", + " predictions_training_k = model_k.predict(X_training)\n", + " predictions_test_k = model_k.predict(X_test)\n", + "\n", + " # compute the accuracy on the training and test set predictions\n", + " accuracy_training_k.append(metrics.accuracy_score(y_training, predictions_training_k))\n", + " accuracy_test_k.append(metrics.accuracy_score(y_test, predictions_test_k))\n", + "\n", + "print(accuracy_training_k)\n", + "print(accuracy_test_k)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# let's plot the accuracy on the training and test set\n", + "import matplotlib.pyplot as plt\n", + "plt.scatter(k_values,accuracy_training_k,marker=\"x\")\n", + "plt.scatter(k_values,accuracy_test_k,marker=\"+\")\n", + "plt.xlim([0, max(k_values)+2])\n", + "plt.ylim([0.0, 1.1])\n", + "plt.xlabel(\"Value of k\")\n", + "plt.ylabel(\"Accuracy\")\n", + "legend_labels = [\"Training (Euclidian dist.)\",\"Test (Euclidian dist.)\"]\n", + "plt.legend(labels=legend_labels, loc=4, borderpad=1)\n", + "plt.title(\"Effect of k on training and test set accuracy\", fontsize=10)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p = 1 training [1.0, 0.8306451612903226, 0.8064516129032258, 0.75, 0.75, 0.717741935483871, 0.7016129032258065, 0.6854838709677419, 0.6451612903225806, 0.6854838709677419, 0.6451612903225806, 0.5806451612903226, 0.5564516129032258, 0.5645161290322581, 0.5645161290322581] \n", + "\n", + "p = 1 test [0.7, 0.7, 0.5333333333333333, 0.43333333333333335, 0.4666666666666667, 0.4, 0.5, 0.4666666666666667, 0.4666666666666667, 0.5, 0.4666666666666667, 0.5, 0.43333333333333335, 0.5, 0.4666666666666667] \n", + "\n", + "p = 2 training [1.0, 0.8306451612903226, 0.7580645161290323, 0.6854838709677419, 0.6370967741935484, 0.6048387096774194, 0.5806451612903226, 0.5967741935483871, 0.5645161290322581, 0.5241935483870968, 0.5161290322580645, 0.46774193548387094, 0.46774193548387094, 0.46774193548387094, 0.43548387096774194] \n", + "\n", + "p = 2 test [0.5666666666666667, 0.4666666666666667, 0.4, 0.4, 0.4, 0.43333333333333335, 0.4, 0.3333333333333333, 0.43333333333333335, 0.4, 0.4, 0.4, 0.36666666666666664, 0.3, 0.3333333333333333] \n", + "\n", + "p = 3 training [1.0, 0.8064516129032258, 0.7096774193548387, 0.6774193548387096, 0.6290322580645161, 0.6129032258064516, 0.5483870967741935, 0.5403225806451613, 0.5161290322580645, 0.49193548387096775, 0.4596774193548387, 0.43548387096774194, 0.4435483870967742, 0.4435483870967742, 0.41935483870967744] \n", + "\n", + "p = 3 test [0.5333333333333333, 0.36666666666666664, 0.4, 0.3333333333333333, 0.36666666666666664, 0.4, 0.3333333333333333, 0.3333333333333333, 0.4, 0.43333333333333335, 0.4, 0.36666666666666664, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333] \n", + "\n", + "p = 4 training [1.0, 0.7983870967741935, 0.6774193548387096, 0.6532258064516129, 0.6370967741935484, 0.5887096774193549, 0.5403225806451613, 0.5080645161290323, 0.5080645161290323, 0.47580645161290325, 0.45161290322580644, 0.41935483870967744, 0.4435483870967742, 0.41935483870967744, 0.4274193548387097] \n", + "\n", + "p = 4 test [0.5333333333333333, 0.3333333333333333, 0.43333333333333335, 0.3, 0.3, 0.4, 0.3333333333333333, 0.36666666666666664, 0.4, 0.4, 0.4, 0.3333333333333333, 0.3, 0.3333333333333333, 0.3] \n", + "\n" + ] + } + ], + "source": [ + "# Now let's explore the impact of using a different distance metric by changing the value of p used in the Minkowski formula\n", + "p_values = list(range(1,5))\n", + "# print(p_values)\n", + "\n", + "accuracy_training_k_p = []\n", + "accuracy_test_k_p = []\n", + "for j in range(len(p_values)):\n", + " accuracy_training_k_p.append([])\n", + " accuracy_test_k_p.append([]) \n", + "\n", + " for k in k_values:\n", + " model_k_p = neighbors.KNeighborsClassifier(n_neighbors=k, p=p_values[j])\n", + " model_k_p.fit(X_training, y_training)\n", + "\n", + " # compute the predictions for the training and test sets\n", + " predictions_training_k_p = model_k_p.predict(X_training)\n", + " predictions_test_k_p = model_k_p.predict(X_test)\n", + "\n", + " # compute the accuracy on the training and test set predictions\n", + " accuracy_training_k_p[j].append(metrics.accuracy_score(y_training, predictions_training_k_p))\n", + " accuracy_test_k_p[j].append(metrics.accuracy_score(y_test, predictions_test_k_p))\n", + "\n", + " print(\"p =\",p_values[j],\"training\",accuracy_training_k_p[j],\"\\n\")\n", + " print(\"p =\",p_values[j],\"test\",accuracy_test_k_p[j],\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEUCAYAAAAmxTHXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAxeUlEQVR4nO3dfXgU5fXw8e8hRAgQQIgoihqsKIS3BKKiUQxFWyooVFFBfEOr4g/BYgURFfnRxxatltYHrQ+0Yn1XVASRim+kYJRCEERAVJRYYxGBSghvEuA8f8zsugmbZJPsZDOZ87muvbI7O3vPmZnsnp257z0jqooxxpjgapToAIwxxiSWJQJjjAk4SwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiAs0TQgInIQRFZHXGb6E4/R0TWudNSROQP7uM/1GAZk2rwmktF5BMRWVxueq6ILKhue9VctufLqGLZZ9XwtekickWc4vi1iDSLR1umYWic6ACMp/aqamaU6SOA36vq0wAiciPQRlUP1mAZk4DfVfM11wM3qOp7NVien+UCu4D3a/DadOAK4Nk4xPFr4GlgTxzaqhERaayqBxK1fFOWHREEjIj8CrgM+K2IPCMi84EWwEoRuVxEjhKRl0VkhXvLcV/XQkRmi8jHIrJGRC4RkWlAintk8UyUZQ13518rIve70yYDZwN/q+wIREROE5FVIvKTctPTRWSpiHzo3s5yp+eKSJ6IvCQiG9x1E/e5Ae60D4GLK1jetSIyz23jcxG5t4L5Dlsnd/ouEblPRD4SkWUicnT5uIFRwDh3e51TybY+N+IobpWIpALTgHPcaePKtd1eRJa4z60VkXPc6T8TkQ/c7TTH3YdjgWOBxeWPyEL7x41lrYjMjNiGJ4vI2+76fRjaLyJyh7s9PnL/H3C3YbZ7P01ECiO28XwReRd4x43nHbe9j0VkcEQcV7v/Zx+JyFMikioim0Qk2X2+ZeRjU0uqarcGegMOAqsjbpe7058AhkbMtyvi/rPA2e79E4BP3Pv3A3+KmO/I8q8tt+xjgX8DR+Eceb4LDHGfywOyo7wmF1gAnAWsBE6IMk8zoKl7vxNQEPHaYqADzhecD3ASTlPga3deAV4EFkRp91pgM9AWSAHWlo+xinVS4EL3/gPA3VGWMQW4PYZt/RqQ495v4S4rN1rc7jy/Ae5y7ycBqUAasARo7k6/A5js3i8E0ipoq03E/aci1ulfwC/d+03d/fALnKObZpGvjdy/bhyFEdu4KGK+xkDLiPk2uvuoK/BZKMaI+WdHbO8bgYcS/R5rKDc7NdSwVXRqqDLnARnuF0GAliLSwp0+LDRRVb+vop3TgDxV3QrgHjH0BV6t4nVdgJnAz1T1P1GeTwZmiEgmTqI7JeK55apa5C5vNc7plF3AJlX93J3+NM6HSDRvqep2d75XcBJJQYzrtB8niYGTxM6vYj2h4m2dD/zRbf8VVS2KmCeaFcDj7rfjV1V1tYicC2QA+e5rj8BJjlXpJyITcD7o2wDrRCQPOE5V5wKo6j4AETkPmK2qe9zp/42h/bci5hPgdyLSFzgEHAccDfwUmKOq28q1+1dgAs72HgncEMPyTAwsEZjyGgF9Qm/2kCo+iOJpM843ziwgWiIYB2wBeuLEGhnnDxH3D1L9/+/yhbeqU4irVN2vqtVYdtRtDUwTkdeBC3A+yH9eWSOqusT9MB0IPCEifwS+x/nQHR7rCohIU+BRnG/zX4vIFJx9UV0H+PG0c/nX7464PwLn6Kq3qpa6p5AqXJ6q5runBnOBJFVdW4PYTBTWR2DKexMYE3rgfvMGeAsYHTH9SPduaQXnaZcD57rniJOA4cA/Y1j+DpwPtN+7b/jyWgGbVfUQcBXOqZDKbADS5ce+hso+GM8XkTYikgIMwflmHqmm6xRSgnPaJiTqthaRn6jqx6p6P863/c5RXkvE604EtqjqLJxvzb2AZUCOiJzsztNcREJHTxW1FfoQ3uYemQwFUNUSoEhEhrhtNRFn1NFbwEj3PiLSxn19IdDbvT+0ku3RCvjOTQL9gBPd6e8Cl4pI23LtAjyJc0ptdiXtmmqyRNCwhTpyQ7dpMbxmLJDtdtStx+ngBPg/wJFuJ+JHQD93+kxgjZTrLFbVzcBEYDHwEbBSVefFErSqbgEGAY+IyBnlnn4UuMaNoTNlv2FGa2sfzqmg18XpLP6uktmXAy8Da4CXVTXytFCt1sn1GvBLd1+cQ8Xb+tfudl4DlAL/cGM66HaejivXbi7wkYisAi4H/uyevroWeM5t5wOc7QXOPnujfGexqu4AZuH0jyzCSUIhVwFj3bbeB45R1TeA+UCBeyrudnfeB4Gb3XjSKtkez7jr/zFwNU7SRlXXAfcB/3T38x/LveZI4LlK2jXVJD8ezRoTXCJyLc4pkVsSHYupmIgMBQar6lWJjqUhsT4CY4wviMj/xRmpdEGiY2lo7IjAGGMCzvoIjDEm4CwRGGNMwPmujyAtLU3T09MTHYYxxvjKypUrt6nqUdGe810iSE9Pp6CgoOoZjTHGhInIVxU9Z6eGjDEm4CwRGGNMwFkiMMaYgPNdH4ExQVBaWkpRURH79pWvR2dM5Zo2bUqHDh1ITo79Ug2WCIyph4qKikhNTSU9Pb0uK78an1NVtm/fTlFRER07doz5dXZqyJh6aN++fbRt29aSgKkWEaFt27bVPpK0RGBMPZWIJLB9+3YyMzPJzMzkmGOO4bjjjgs/3r9/f5Wvz8vL4/33K74k86uvvsrUqVMBmDJlCiLCxo0bw8//6U9/QkRqPER8ypQpPPjgg9V6ze9+9+Mlt3fs2MGjjz5ao2V7acaMGZx88smICNu2bQtPX7BgAZMnTy4zb03+bywRGONj5WuF1bZ2WNu2bVm9ejWrV69m1KhRjBs3Lvz4iCOOqPL1VSWCBx54gP/5n/8JP+7evTvPP/98+PGcOXPo2rVrrdahuvyQCHJycnj77bc58cQTy0wfOHAgr732Gnv27KlV+5YIjPGp6W99xtQF68Mf/qrK1AXrmf7WZ3FdzsqVKzn33HPp3bs3P//5z9m8eTMADz/8MBkZGfTo0YNhw4ZRWFjIY489xvTp08nMzGTp0qVl2vnss89o0qQJaWk/XqJgyJAhzJvnXNLhiy++oFWrVmWev/nmm8nOzqZr167ce++94enp6ence++99OrVi+7du7Nhw4bwc+vXryc3N5eTTjqJhx9+uMyyevfuTdeuXZk5cyYAEydOZO/evWRmZjJixAgmTpzIF198QWZmJuPHj2fXrl30798/vJxQrIWFhXTp0oUbbriBrl278rOf/Yy9e/dWuh2nTJnCVVddxZlnnkmnTp2YNWtWzPsgKyuLaBUVRITc3FwWLFhw+IuqwTqLjfEhVWXnvlJm5xcCMHlQBlMXrGd2fiEjc9JR1bicWlJVxowZw7x58zjqqKN44YUXuOuuu3j88ceZNm0amzZtokmTJuzYsYPWrVszatQoWrRowe23335YW/n5+fTq1avMtJYtW3L88cezdu1a5s2bx+WXX87s2T9efOy+++6jTZs2HDx4kP79+7NmzRp69OgBQFpaGh9++CGPPvooDz74IH/9618B2LBhA4sXL6akpIRTTz2Vm2++meTkZB5//HHatGnD3r17Oe2007jkkkuYNm0aM2bMYPXq1YDzAb927drw4wMHDjB37lxatmzJtm3b6NOnDxdddBEAn3/+Oc899xyzZs3isssu4+WXX+bKK6+sdHuuWbOGZcuWsXv3brKyshg4cCCpqamcc845Ued/9tlnycjIqLTN7Oxsli5dymWXXVbpfJWxRGCMD4kIkwc5HxCz8wvDCWFkTjqTB2XErX/hhx9+YO3atZx//vkAHDx4kPbt2wPQo0cPRowYwZAhQxgyZEiVbW3evJmjjjq81M2wYcN4/vnnWbRoEe+8806ZRPDiiy8yc+ZMDhw4wObNm1m/fn04EVx88cUA9O7dm1deeSX8moEDB9KkSROaNGlCu3bt2LJlCx06dODhhx9m7ty5AHz99dd8/vnntG3bttKYVZVJkyaxZMkSGjVqxDfffMOWLVsA6NixI5mZmeEYCgsLq9wGgwcPJiUlhZSUFPr168fy5csZMmRIOPHURLt27fjPf6Jd3jt2lgiM8alQMgglASCuSQCcD8KuXbvywQcfHPbc66+/zpIlS3jttde47777+PjjjyttKyUlheLi4sOmDxo0iPHjx5OdnU3Lli3D0zdt2sSDDz7IihUrOPLII7n22mvLjIZp0qQJAElJSRw4cOCw6ZHP5eXl8fbbb/PBBx/QrFkzcnNzYxpZ88wzz7B161ZWrlxJcnIy6enp4deVX05Vp4bg8I5cEaGkpKRWRwT79u0jJSWlymVXxvoIjPGpUJ9ApMg+g3ho0qQJW7duDSeC0tJS1q1bx6FDh/j666/p168f999/P8XFxezatYvU1FRKSkqittWlS5cyI4RCmjVrxv33389dd91VZvrOnTtp3rw5rVq1YsuWLfzjH/+o8XoUFxdz5JFH0qxZMzZs2MCyZcvCzyUnJ1NaWgpwWPzFxcW0a9eO5ORkFi9ezFdfVVi3LWzGjBnMmDEj6nPz5s1j3759bN++nby8PE477TRSU1PDHfLlb1UlAXD6Xrp161blfJWxRGCMD4WSQKhPYNPvL2BkTjqz8wvjmgwaNWrESy+9xB133EHPnj3JzMzk/fff5+DBg1x55ZV0796drKwsxo4dS+vWrbnwwguZO3du1M7ivn37smrVqqixDRs27LD+g549e5KVlUXnzp254ooryMnJqfF6DBgwgAMHDtClSxcmTpxInz59ws/deOON4dNcbdu2JScnh27dujF+/HhGjBhBQUEB3bt358knn6Rz585VLmvDhg0VnnLq0aMH/fr1o0+fPtxzzz0ce+yxMcX/8MMP06FDB4qKiujRowe/+tWvws8tXryYgQMHxtRORXx3qcrs7Gy1MtSmofvkk0/o0qVLpfNMf+szdu4rDZ8OCiWHlk2TGXf+KXUUafXceuutXHjhhZx33nmJDsUzgwYN4pVXXjlsuO2UKVMq7EivqS1btnDFFVfwzjvvlJke7f9HRFaqana0djzrIxCRx4FBwHeqethxizgny/6McyHqPcC1qvqhV/EY09CMO/+UMqODQn0G9fnXyJMmTeJf//pXosPwVG2HclbHv//9bx566KFat+NlZ/ETwAzgyQqe/wXQyb2dAfzF/WuMiVG0zsf67Oijjw4PvwyaKVOmxL3N0047LS7teNZHoKpLgP9WMstg4El1LANai0h7r+IxxhgTXSI7i48Dvo54XOROO4yI3CgiBSJSsHXr1joJzhhjgsIXo4ZUdaaqZqtqdrQfpBhjjKm5RCaCb4DjIx53cKcZY4ypQ4lMBPOBq8XRByhW1c0JjMeYwKvrMtSR7WdmZrJjx45qx/zEE09wyy23APDYY4/x5JOHj08pLCwM/+iqoKCAsWPHVns5Xgn9MC8jI4OuXbvy5z//Ofzc7bffzrvvvut5DF4OH30OyAXSRKQIuBdIBlDVx4CFOENHN+IMHx3pVSzGmNiEylBDzca95+Xl0aJFC84666yozz/wwAPMnz8//HjcuHFxHVc/atSoKufJzs4mOzvqcPqEaNy4MQ899BC9evWipKSE3r17c/7555ORkcGYMWO44YYb+OlPf+ppDF6OGhququ1VNVlVO6jq31T1MTcJ4I4WGq2qP1HV7qpqvxIzpiZmD3RuHvGyDHU0kd/wwfmBVl5eHgBvvPEGvXr1omfPnvTv3/+w10ZemGblypX07NmTnj178sgjj4TnycvLY9CgQQAsX76cM888k6ysLM466yw+/fTTcAwXX3wxAwYMoFOnTkyYMKHK7ZSbm8utt95KZmYm3bp1Y/ny5VW+BqB9+/bhX1WnpqbSpUsXvvnGOUt+4oknsn37dr799tuY2qopKzpnjKmQ12Wop0+fztNPPw3AkUceyeLFiyuMZevWrdxwww0sWbKEjh078t//VjY6HUaOHMmMGTPo27cv48ePjzpP586dWbp0KY0bN+btt99m0qRJvPzyywCsXr2aVatW0aRJE0499VTGjBnD8ccfH7WdkD179rB69WqWLFnCddddx9q1a1m8eDHjxo07bN5mzZoddhqtsLCQVatWccYZP/6kqlevXuTn53PJJZdUuuzasERgjF+FjgK+eq/s45Gvx20RXpehrs6poWXLltG3b9/wRdnbtGlT4bw7duxgx44d9O3bF4CrrroqatG64uJirrnmGj7//HNEJFx8DqB///60atUKgIyMDL766qsqE8Hw4cMBp67Szp072bFjB/369YupzPSuXbu45JJL+NOf/lSmCms8ykxXxRKBMaZCdVGGurzGjRtz6NCh8OPqXoi9Ou655x769evH3LlzKSwsJDc3N/xctHLWVYn2S+9YjghKS0u55JJLGDFiRPg6CyHxKDNdFV/8jsAYE8XI153biWc7t9DjOKqLMtTlpaens3r16vAyQufa+/Tpw5IlS9i0aRNApaeGWrduTevWrXnvPedo6Zlnnok6X3FxMccd5/yO9YknnqgyNoCrr766wvP/L7zwAgDvvfcerVq1olWrVuEjgvK3UBJQVa6//nq6dOnCbbfddlib8SgzXRVLBMaYCnldhjrUsRy6FRYWkpOTQ8eOHcnIyGDs2LHhfoWjjjqKmTNncvHFF9OzZ08uv/zySmOfPXs2o0ePJjMzs8Ky3BMmTODOO+8kKysrpm/84FxusqLy0U2bNiUrK4tRo0bxt7/9Lab28vPzeeqpp3j33XfD22HhwoWAk3g3btzo/SgnVfXVrXfv3mpMQ7d+/fpEh+CJsWPH6ltvvZXoMGqsuLhYhw4dGvW5c889V1esWBHX5b3yyit69913V/t10f5/gAKt4HPVjgiMMXVm0qRJ7NmzJ9Fh1FjLli2ZM2dOnS3vwIED/OY3v/F8OdZZbIypMw25DHXotw7xdOmll8a9zWjsiMAYYwLOEoExxgScJQJjjAk4SwTGGBNwlgiMMWG1KUMda3nniiqT1sSqVau4/vrra93O9ddfT8+ePenRowdDhw5l165dAMyYMYPHH3+81u3Xd6IV/NCivsrOztaCAitUahq2Tz75hC5duiQ0hmhlqA8cOEDjxvVnsOGll17K3XffTc+ePWvVzs6dO8P1fW677TbatWvHxIkT2bNnDzk5OaxatSoe4daZaP8/IrJSVaP+Ms2OCIzxuZL9JQx+dTAl+6OXdqita6+9llGjRnHGGWcwYcKECks3R5Z3njJlCtdddx25ubmcdNJJPPzww+H2WrRoEZ4/NzeXoUOH0rlzZ0aMGBH+BfDChQvp3LkzvXv3ZuzYseF2y6x3SQlr1qwJJ4EpU6Zw1VVXceaZZ9KpUydmzZoV8zqGkoCqsnfv3nDNoGbNmpGenh5zSWm/qj+p3RhTI0uKlvBl8ZcsLVrKBSdd4MkyioqKeP/990lKSmLnzp0Vlm6OtGHDBhYvXkxJSQmnnnoqN998M8nJyWXmWbVqFevWrePYY48lJyeH/Px8srOzuemmm8LlpkMVPcsrKCg4rAbPmjVrWLZsGbt37yYrK4uBAweSmprKOeecE7WNZ599loyMDMApW71w4UIyMjJ46KGHwvNkZ2ezdOlSTj/99GptMz+xRGCMT0345wTyivLYf9A5dz/pvUlM+WAKuR1yeeDcB+K6rEsvvZSkpCSg8tLNkQYOHEiTJk1o0qQJ7dq1Y8uWLXTo0KHMPKeffnp4WqjWUIsWLTjppJPC5aaHDx/OzJkzD2s/WlnrwYMHk5KSQkpKCv369WP58uUMGTIkpjLQs2fP5uDBg4wZM4YXXniBkSOdiya2a9eODRs2VPl6P7NTQ8b41C1Zt9C+eXuSGznfspMbJdO+eXvGZI2J+7KaN28evh8q3bx27Vpee+21CstEx1LGuSalnkNSUlIOW3a0MtAlJSVlCttF3tavX19m/qSkJIYNG1bmCKcuykAnmiUCY3zqhJYnMDpzNKWHSklpnELpoVJGZ47m+JaVXzyltmpSurk6Tj31VL788ksKCwuBH0s7lxetrPW8efPYt28f27dvJy8vj9NOO43U1NSoZaBXr15NRkYGqhpuR1WZP38+nTt3DrdZF2WgE80SgTE+tqhwESmNUxidOZqUxim8Wfim58usSenm6khJSeHRRx9lwIAB9O7dm9TU1PCVwiJ17tyZ4uLiMtc/6NGjB/369aNPnz7cc889FZaLjqSqXHPNNXTv3p3u3buzefNmJk+eHH4+Pz8/fIW2hsqGjxpTD8U6fHTttrUc0/wY0lLS2LZ3G1t2b6FrWtc6iNBbu3btokWLFqgqo0ePplOnTlGv8jV9+nRSU1P51a9+FXW4a22tWrWKP/7xjzz11FNxa7Mu2PBRYwKkW1o30lLSAEhLSWsQSQBg1qxZZGZm0rVrV4qLi7npppuiznfzzTeX6WeIt23btvHb3/7Ws/brCzsiMKYeqg8/KDP+ZUcExhhjqsUSgTHGBJwlAmOMCThLBMYYE3CWCIwxYUEtQx0yduzYcFE8CE4Zaqs1ZIwJa9u2bbguT3XLUGdnZ5OdHXVQShnvv/9+XGIF+N3vfsfdd98dl7YKCgr4/vvvy0y77rrryMnJ4brrrovLMuorOyIwxqc+7Z3NJ527HHb7tHfVH8bVEYQy1AcPHmT8+PE88EDZYn1WhtoYU68d2r27WtNro6GXoZ4xYwYXXXQR7du3P2weK0NdSyIyAPgzkAT8VVWnlXv+BODvQGt3nomqutDLmIwx1deQy1D/5z//Yc6cOeTl5UV9PghlqD1LBCKSBDwCnA8UAStEZL6qRtZ9vRt4UVX/IiIZwEIg3auYjDE1E60M9dy5cyksLCQ3Nzfqa+pTGerKjgg2bdrExo0bOfnkkwHYs2cPJ598crgiaRDKUHt5RHA6sFFVvwQQkeeBwUBkIlCgpXu/FfAfD+MxxsRBXZahTk9Pr7QMdeSVxMApQ33nnXeye/du8vLymDZtWrgMdUUyMjL49ttvw49btGhRprz1Z599Rk5OTu1Wqp7zsrP4OODriMdF7rRIU4ArRaQI52gg6hU1RORGESkQkYKtW7d6EasxJkYNrQx1VawMdW0aFhkKDFDVX7mPrwLOUNVbIua5zY3hIRE5E/gb0E1VD1XUrhWdM0EQS9G5T3tnR+0YbtS8Oaeu9Pd7xMpQ1051i855eWroGyDyUkkd3GmRrgcGAKjqByLSFEgDvvMwLmN8QVUPO+cdye8f9pWZNWsWf//739m/fz9ZWVmVlqGeM2eOZ3H4sQx1Tb7ce3lE0Bj4DOiPkwBWAFeo6rqIef4BvKCqT4hIF+Ad4DitJCg7IjBBsGnTJlJTU2nbtm2lycCYSKrK9u3bKSkpCY+6CknIEYGqHhCRW4BFOENDH1fVdSIyFShQ1fnAb4BZIjIOp+P42sqSgDFB0aFDB4qKirA+MVNdTZs2PWyYblXswjTGGBMAdmEaY4wxFbJEYIwxAWeJwBhjAs4SgTHGBJwlAmOMCThLBMYYE3CWCIwxJuAsERhjTMBZIjDGmIBrkImg/K+l4/HraS/aNMaY+qDBJYLpb33G1AXrwx/UqsrUBeuZ/tZn9apNY4ypLxpUIlBVdu4rZXZ+YfiDe+qC9czOL2TnvtIafYv3ok1jjKlPPL14fV0TESYPygBgdn4hs/MLARiZk87kQRk1KufrRZvGGFOfNKgjAij7wR1S2w9sL9o0xpj6osElgtCpm0iR5/frS5vGGFNfNKhEEHn+fmROOpt+fwEjc9LLnN+vD20aY0x90uD6CFo2TS5z/j50Sqdl0+Qa9xHEu01jjKlPGuQVyspf9Luqi4DHwos2jTGmrgTuCmXlP6Dj8YHtRZvGGFMfNMhEYIwxJnaWCIwxJuAsERhjTMBZIjDGmICrMhGIyIUiYgnDGGMaqFg+4C8HPheRB0Sks9cBGWOMqVtVJgJVvRLIAr4AnhCRD0TkRhFJ9Tw6Y4wxnovplI+q7gReAp4H2gO/BD4UkTEexmaMMaYOxNJHcJGIzAXygGTgdFX9BdAT+I234RljjPFaLLWGLgGmq+qSyImqukdErvcmLGOMMXUllkQwBdgceiAiKcDRqlqoqu94FZgxxpi6EUsfwRzgUMTjg+60KonIABH5VEQ2isjECua5TETWi8g6EXk2lnaNMcbETyxHBI1VdX/ogaruF5EjqnqRiCQBjwDnA0XAChGZr6rrI+bpBNwJ5Kjq9yLSrtprYIwxplZiOSLYKiIXhR6IyGBgWwyvOx3YqKpfuonkeWBwuXluAB5R1e8BVPW72MI2xhgTL7EcEYwCnhGRGYAAXwNXx/C649x5Q4qAM8rNcwqAiOQDScAUVX2jfEMiciNwI8AJJ5wQw6KNMcbEqspEoKpfAH1EpIX7eFecl98JyAU6AEtEpLuq7igXw0xgJjgXponj8o0xJvBiulSliAwEugJNQxdkUdWpVbzsG+D4iMcd3GmRioB/qWopsElEPsNJDCtiicsYY0ztxfKDssdw6g2NwTk1dClwYgxtrwA6iUhHt3N5GDC/3Dyv4hwNICJpOKeKvowxdmOMMXEQS2fxWap6NfC9qv4vcCbuuf3KqOoB4BZgEfAJ8KKqrhORqRGdz4uA7SKyHlgMjFfV7TVZkcPMHujcjDHGVCqWU0P73L97RORYYDtOvaEqqepCYGG5aZMj7itwm3szxhiTALEkgtdEpDXwB+BDQIFZXgZVK6GjgK/eK/t45OuJiccYY+q5ShOBe0Gad9xRPC+LyAKgqaoW10VwxhhjvFdpIlDVQyLyCM71CFDVH4Af6iKwGgt987cjAWOMiUksncXviMglEho3auLK6Sap+LExxngtlj6Cm3A6cw+IyD6cIaSqqi09jayGPu2dzaHdu3+ccH8XABo1b86pKwsSFFV009/6jJ37Spk8KAMRQVWZumA9LZsmM+78KgdmGWNMXMRyqcpUVW2kqkeoakv3cb1MAkDZJBDD9ERRVXbuK2V2fiFTF6wPJ4HZ+YXs3FdqRwbGmDpT5RGBiPSNNr38hWpM9YgIkwdlADA7v5DZ+YUAjMxJDx8hGGNMXYjl1ND4iPtNcaqKrgR+6klEARJKBqEkAFgSMMbUuVhODV0YcTsf6AZ8731oDV/odFCk0GkiY4ypK7GMGiqvCOgS70CCJrJPYGROOpt+fwEjc9LL9BkYY0xdiKWP4P/i/JoYnMSRifML43qpUfPmUTuGGzVvnoBoKiYitGyaXKZPINRn0LJpsp0eqiVVLbMNyz8OAj9tAz/FCv6LtypS1TdPEbkm4uEBoFBV8z2NqhLZ2dlaUFC/hoHWRkP7h6oPbFiuv7aBn2IF/8UbIiIrVTU72nOxnBp6CXhaVf+uqs8Ay0SkWVwjDLDyH/qWBGrHhuX6axv4KVbwX7yxiuWIYBlwXujKZO6Vyt5U1bPqIL7DNLQjAhN/kW/OkKANy/XTNvBTrOC/eENqe0TQNPLylO59OyIw9VZkf0tIfX+TxpuftoGfYgX/xRuLWBLBbhHpFXogIr2Bvd6FZEzt2LBcf20DP8UK/os3FrEkgl8Dc0RkqYi8B7yAc+UxY+odG5brr23gp1jBf/HGqsrho6q6QkQ6A6e6kz51LzZvAsYPI5y8Hpbr1TaIZ7t+2gZ+G0btZbyJfH/F0lk8GnjGvTgNInIkMFxVH/U+vMMltLM4wNc48NuQOS/eVF5tA6/a9dM28MOXjEjxjrcu3l+17Sy+IZQEAFT1e+CGuERmfMGPQ+biPSzXq23g5bb1yzbwIlavxTPe+vD+iuWI4GOgh7ozikgSsEZVu3oeXRQJOSIofx3kE892/gboyMCvQ+biyatt4Kdt66dY/aQutmttjwjeAF4Qkf4i0h94DvhHXCIzvtEQh8xVl1fbwE/b1k+x+kmit2ssieAO4F1glHv7GEjxMqh6Z+Trzu3Es51b6HGANMQhc9Xl1Tbw07b1U6x+kujtGksZ6kPAv4BCnGsR/BT4xNuwTG2V/weqzT9UQx0yVx1ebQM/bVs/xRoSz/eBV+rDdq1w+KiInAIMd2/bcH4/gKr28zyq+irORwGHXV/ZVdvrK8d7BEKdDPGr5yOyvNoGZdrdNh55Qph87YJatxtSsr+EKxdeydMXPE3qEam1astvQz39MtKtPmzXyn5HsAFYCgxS1Y1uwOM8jyhAvLi+cuQIBHDOM0Z+26jpMLdx559S5rWhf9b69ub3klfbINzuE/HftkuKlvBl8ZcsLVrKBSddUOv2/PJ/4NX7wCuJ3q4VjhoSkSHAMCAHp8P4eeCvqtqxTiKrQEMqOvdJ54qv79NlQ83PvvlqZIeNyPJkG0z45wTyivLYf3A/B/UgSZLEEUlHkNshlwfOfaCWAfuDr94HdaBGo4ZU9VVVHQZ0BhbjlJpoJyJ/EZGfeRKpiYtEj0AwiXdL1i20b96e5EbJACQ3SqZ98/aMyRqT4Mjqjr0PYhdLZ/FuVX1WVS8EOgCrcEYSmXoq0SMQqsVGZHmyDU5oeQKjM0dTeqiUlMYplB4qZXTmaI5veXycgq7/fPU+SLBqXbNYVb9X1Zmq2t+rgEzt1IcRCKZ+WFS4iJTGKYzOHE1K4xTeLHwz0SHVGXsfVE+VReeMd7y4vnJ9GIFQXaqKRHwDrm8deXUmzkdCI7uN5M4z7iQtJY2BJw1ky+4tcW2/PvPj+yCRqiwxUavGRQYAfwaScDqap1Uw3yU4l8Q8TVUr7QluSJ3FISWzf8GV8i1PX7Gk1kP8QpxRKIOcByNfj9+Ha5yHeX7csxeNfzj88hYHmqTQ/aMPa9d4PR+SGuLFMGKvhiZ71S7gyf7yWzE7L9W2xERNF5oEPAL8AsgAhotIRpT5UoFbcX60FkhL2MuXcoClRUvj1qYfinipatQkAND4h72BOXz3YhixF2162a5X/PA+qA+8PDV0OrBRVb8EEJHngcHA+nLz/Ra4HxjvYSz10oQnziCPveyXQ4AwackdTFkykdyOA2o3xK/8cMR4fNPyoM2q3pQ1ftN6sf7GO7a/Es6zIwLgOODriMdF7rQwcS6BebyqVrrHReRGESkQkYKtW7fGP9IEuUVb0Z4kkt1vvslAe5ICNcTPGJN4CessFpFGwB+Ba6uaV1VnAjPB6SPwNrK6c8LINxld+CYT8n5Digr7GzVidN/7az/EL/RNKp7frLxo0yt+itXY/qoHvDwi+AaI/ETr4E4LSQW6AXkiUgj0AeaLSNTOjIZqUeEiUhBGa6tADvGrzfPGmPjw8ohgBdBJRDriJIBhwBWhJ1W1GEgLPRaRPOD2qkYNJVI8C3iFlBnit3dbfIf4efDNqmTE88422F8SlyJmB5qkVDhqqNYdez4pEujFMGIv2vSyXYjv/1YZdqRRJc8SgaoeEJFbgEU4w0cfV9V1IjIVKFDV+V4t2yvxLuAF0C2tW/h+WkoaaSlplcydePHeBt0/+tA3Q/y8GjFT62GXddSml+2CN+8vExtPf0fghUT8jsAKeNk2AO+KBAadZ/9bVtCwjIT8jqAhsQJetg2Md+x/K/EsEcTACnjZNjDe8ex/ywoaxswSQYyCXMArxLaB8Yr9byWW9RHEaO22tRzT/BjSUtLY5o7u6ZrWtc7jSKSgbwNP6+wEXND/t+pCZX0Elghi4NcPAC+Gu/qB1/vLiyKBQeb5/orz+8DLeL3837LO4lryW6GtkMjheEHi9f7yokhgkHm+v+L8PvAy3kT9b9kRQQz8Nmww6EM9vdpf4SKBHOKgCEkKRyC1LxIYcJ7tL4/eB17EWxf/W3ZEEDA2HM8bViTQX/z0Pkj0/5YlggbIhnp644SRbzI6935KpREpKpQ2SmJ0bhyKBBpP+Ol9kOj/LUsEDZQNx/NGkIsE+pGf3geJ/N+yPoIY+G3UkJ8ufegFL2O1YY7x57fLavr1/VVZH4FdvD4G9e2Drip+uvShF7zcX34rEugHXu0vz4sExrGqaaLfX5YIjDGmOhrgpTWtj8AYYwLOjgiMMaY6GuClNe2IwBhjAs6OCBogP1360BiveP4/G8cjgUS/v2z4qDHGBICVmDDGGFMhSwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiAs0RgjDEBZ4nAGGMCzhKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4CwRGGNMwHmaCERkgIh8KiIbRWRilOdvE5H1IrJGRN4RkRO9jMcYY8zhPEsEIpIEPAL8AsgAhotIRrnZVgHZqtoDeAl4wKt4jDHGROflEcHpwEZV/VJV9wPPA4MjZ1DVxaq6x324DOjgYTzGGGOi8DIRHAd8HfG4yJ1WkeuBf0R7QkRuFJECESnYunVrHEM0xhhTLzqLReRKIBv4Q7TnVXWmqmaravZRRx1Vt8EZY0wD5+WlKr8Bjo943MGdVoaInAfcBZyrqj94GI8xxpgovDwiWAF0EpGOInIEMAyYHzmDiGQB/w+4SFW/8zAWY4wxFfAsEajqAeAWYBHwCfCiqq4TkakicpE72x+AFsAcEVktIvMraM4YY4xHvDw1hKouBBaWmzY54v55Xi7fGGNM1epFZ7ExxpjEsURgjDEBZ4nAGGMCzhKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4CwRGGNMwFkiMMaYgLNEYIwxAWeJwBhjAs4SgTHGBJwlAmOMCThLBMYYE3CWCIwxJuAsERhjTMBZIjDGmICzRGCMMQFnicAYYwLOEoExxgScJQJjjAk4SwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiAs0RgjDEBZ4nAGGMCzhKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4DxNBCIyQEQ+FZGNIjIxyvNNROQF9/l/iUi6l/EYY4w5nGeJQESSgEeAXwAZwHARySg32/XA96p6MjAduN+reIwxxkTn5RHB6cBGVf1SVfcDzwODy80zGPi7e/8loL+IiIcxGWOMKaexh20fB3wd8bgIOKOieVT1gIgUA22BbZEziciNwI3uwx9EZK0nESdWGuXWu4Gw9fIXWy9/qc56nVjRE14mgrhR1ZnATAARKVDV7ASHFHe2Xv5i6+Uvtl6V8/LU0DfA8RGPO7jTos4jIo2BVsB2D2MyxhhTjpeJYAXQSUQ6isgRwDBgfrl55gPXuPeHAu+qqnoYkzHGmHI8OzXknvO/BVgEJAGPq+o6EZkKFKjqfOBvwFMishH4L06yqMpMr2JOMFsvf7H18hdbr0qIfQE3xphgs18WG2NMwFkiMMaYgPNVIqiqZIVfiUihiHwsIqtFpCDR8dSUiDwuIt9F/s5DRNqIyFsi8rn798hExlgTFazXFBH5xt1nq0XkgkTGWBMicryILBaR9SKyTkRudaf7dp9Vsk4NYX81FZHlIvKRu27/607v6Jbo2eiW7Dmi2m37pY/ALVnxGXA+zo/TVgDDVXV9QgOLAxEpBLJV1dc/eBGRvsAu4ElV7eZOewD4r6pOc5P3kap6RyLjrK4K1msKsEtVH0xkbLUhIu2B9qr6oYikAiuBIcC1+HSfVbJOl+H//SVAc1XdJSLJwHvArcBtwCuq+ryIPAZ8pKp/qU7bfjoiiKVkhUkgVV2CM/orUmQZkb/jvCl9pYL18j1V3ayqH7r3S4BPcH7t79t9Vsk6+Z46drkPk92bAj/FKdEDNdxffkoE0UpWNIgdjLMz3xSRlW45jYbkaFXd7N7/Fjg6kcHE2S0issY9deSb0yfRuJV/s4B/0UD2Wbl1ggawv0QkSURWA98BbwFfADtU9YA7S40+F/2UCBqys1W1F06l1tHuqYgGx/2xoD/ORVbtL8BPgExgM/BQQqOpBRFpAbwM/FpVd0Y+59d9FmWdGsT+UtWDqpqJU6nhdKBzPNr1UyKIpWSFL6nqN+7f74C5ODu4odjinrcNnb/9LsHxxIWqbnHflIeAWfh0n7nnml8GnlHVV9zJvt5n0dapoeyvEFXdASwGzgRauyV6oIafi35KBLGUrPAdEWnudmohIs2BnwENqbpqZBmRa4B5CYwlbkIflK5f4sN95nY+/g34RFX/GPGUb/dZRevUQPbXUSLS2r2fgjNw5hOchDDUna1G+8s3o4YA3CFff+LHkhX3JTai2hORk3COAsAp+fGsX9dLRJ4DcnFK424B7gVeBV4ETgC+Ai5TVV91vFawXrk4pxkUKARuijiv7gsicjawFPgYOOROnoRzTt2X+6ySdRqO//dXD5zO4CScL/EvqupU9zPkeaANsAq4UlV/qFbbfkoExhhj4s9Pp4aMMcZ4wBKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4CwRmMBwq1L+vNy0X4tIhQW6RCRPRDy96LmIPOeWPhhXbvoTIjK0otcZEy+eXarSmHroOZwfIi6KmDYMmJCYcEBEjgFOU9WTExWDMXZEYILkJWBgqF67W5TsWGCpiPxFRAoi67yXJyK7Iu4PFZEn3PtHicjLIrLCveVEeW1TEZktznUnVolIP/epN4Hj3Br551QUuIj81j1CSKrhuhtTITsiMIGhqv8VkeU4xf3m4RwNvKiqKiJ3uc8nAe+ISA9VXRNj038GpqvqeyJyAs4RR5dy84x2QtDuItIZp9rsKcBFwAK3kFhUIvIHIBUYqfYLUOMBOyIwQRM6PYT79zn3/mUi8iHOT/S7AhnVaPM8YIZbHng+0NKtfhnpbOBpAFXdgFO64ZQY2r4HaKWqoywJGK/YEYEJmnnAdBHpBTRT1ZUi0hG4Hedc/ffuKZ+mUV4b+UEc+XwjoI+q7vMg3hVAbxFp45d6P8Z/7IjABIp7hafFwOP8eDTQEtgNFIvI0TinjqLZIiJdRKQRTgXLkDeBMaEHIpIZ5bVLgRHu86fgFHT7NIaQ3wCmAa+HqtQaE2+WCEwQPQf0dP+iqh/hnBLaADwL5FfwuonAAuB9nIubhIwFst0hoOuBUVFe+yjQSEQ+Bl4Aro21QqSqzsGpoT/fLT9sTFxZ9VFjjAk4OyIwxpiAs0RgjDEBZ4nAGGMCzhKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4P4/J7yZq61p3k8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# let's plot the accuracy on the training set\n", + "import matplotlib.pyplot as plt\n", + "plt.scatter(k_values,accuracy_training_k_p[0],marker=\"x\")\n", + "plt.scatter(k_values,accuracy_training_k_p[1],marker=\"x\")\n", + "plt.scatter(k_values,accuracy_training_k_p[2],marker=\"*\")\n", + "plt.scatter(k_values,accuracy_training_k_p[3],marker=\"s\")\n", + "plt.xlim([0, max(k_values)+2])\n", + "plt.ylim([0.0, 1.1])\n", + "plt.xlabel(\"Value of k\")\n", + "plt.ylabel(\"Accuracy\")\n", + "legend_labels = [\"Training (Manhattan, p=1)\",\"Training (Euclidian, p=2)\",\"Training (p=3)\",\"Training (p=4)\"]\n", + "plt.legend(labels=legend_labels, loc=1, borderpad=0.2)\n", + "plt.title(\"Effect of k and p on training set accuracy\", fontsize=10)\n", + "plt.show()\n", + "\n", + "# let's plot the accuracy on the test set\n", + "import matplotlib.pyplot as plt\n", + "plt.scatter(k_values,accuracy_test_k_p[0],marker=\"x\")\n", + "plt.scatter(k_values,accuracy_test_k_p[1],marker=\"+\")\n", + "plt.scatter(k_values,accuracy_test_k_p[2],marker=\"*\")\n", + "plt.scatter(k_values,accuracy_test_k_p[3],marker=\"s\")\n", + "plt.xlim([0, max(k_values)+2])\n", + "plt.ylim([0.0, 1.1])\n", + "plt.xlabel(\"Value of k\")\n", + "plt.ylabel(\"Accuracy\")\n", + "legend_labels = [\"Test (Manhattan, p=1)\",\"Test (Euclidian, p=2)\",\"Training (p=3)\",\"Training (p=4)\"]\n", + "plt.legend(labels=legend_labels, loc=1, borderpad=0.2)\n", + "plt.title(\"Effect of k and p on test set accuracy\", fontsize=10)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "w = uniform ,p = 1 training [1.0, 0.8306451612903226, 0.8064516129032258, 0.75, 0.75, 0.717741935483871, 0.7016129032258065, 0.6854838709677419, 0.6451612903225806, 0.6854838709677419, 0.6451612903225806, 0.5806451612903226, 0.5564516129032258, 0.5645161290322581, 0.5645161290322581] \n", + "\n", + "w = uniform ,p = 1 test [0.7, 0.7, 0.5333333333333333, 0.43333333333333335, 0.4666666666666667, 0.4, 0.5, 0.4666666666666667, 0.4666666666666667, 0.5, 0.4666666666666667, 0.5, 0.43333333333333335, 0.5, 0.4666666666666667] \n", + "\n", + "w = uniform ,p = 2 training [1.0, 0.8306451612903226, 0.7580645161290323, 0.6854838709677419, 0.6370967741935484, 0.6048387096774194, 0.5806451612903226, 0.5967741935483871, 0.5645161290322581, 0.5241935483870968, 0.5161290322580645, 0.46774193548387094, 0.46774193548387094, 0.46774193548387094, 0.43548387096774194] \n", + "\n", + "w = uniform ,p = 2 test [0.5666666666666667, 0.4666666666666667, 0.4, 0.4, 0.4, 0.43333333333333335, 0.4, 0.3333333333333333, 0.43333333333333335, 0.4, 0.4, 0.4, 0.36666666666666664, 0.3, 0.3333333333333333] \n", + "\n", + "w = uniform ,p = 3 training [1.0, 0.8064516129032258, 0.7096774193548387, 0.6774193548387096, 0.6290322580645161, 0.6129032258064516, 0.5483870967741935, 0.5403225806451613, 0.5161290322580645, 0.49193548387096775, 0.4596774193548387, 0.43548387096774194, 0.4435483870967742, 0.4435483870967742, 0.41935483870967744] \n", + "\n", + "w = uniform ,p = 3 test [0.5333333333333333, 0.36666666666666664, 0.4, 0.3333333333333333, 0.36666666666666664, 0.4, 0.3333333333333333, 0.3333333333333333, 0.4, 0.43333333333333335, 0.4, 0.36666666666666664, 0.3333333333333333, 0.3333333333333333, 0.3333333333333333] \n", + "\n", + "w = uniform ,p = 4 training [1.0, 0.7983870967741935, 0.6774193548387096, 0.6532258064516129, 0.6370967741935484, 0.5887096774193549, 0.5403225806451613, 0.5080645161290323, 0.5080645161290323, 0.47580645161290325, 0.45161290322580644, 0.41935483870967744, 0.4435483870967742, 0.41935483870967744, 0.4274193548387097] \n", + "\n", + "w = uniform ,p = 4 test [0.5333333333333333, 0.3333333333333333, 0.43333333333333335, 0.3, 0.3, 0.4, 0.3333333333333333, 0.36666666666666664, 0.4, 0.4, 0.4, 0.3333333333333333, 0.3, 0.3333333333333333, 0.3] \n", + "\n", + "w = distance ,p = 1 training [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] \n", + "\n", + "w = distance ,p = 1 test [0.7, 0.7333333333333333, 0.6333333333333333, 0.5, 0.5333333333333333, 0.43333333333333335, 0.4666666666666667, 0.4666666666666667, 0.4666666666666667, 0.5333333333333333, 0.4666666666666667, 0.43333333333333335, 0.4666666666666667, 0.5, 0.4666666666666667] \n", + "\n", + "w = distance ,p = 2 training [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] \n", + "\n", + "w = distance ,p = 2 test [0.5666666666666667, 0.5333333333333333, 0.5, 0.4666666666666667, 0.36666666666666664, 0.4, 0.4, 0.43333333333333335, 0.4, 0.43333333333333335, 0.43333333333333335, 0.4, 0.43333333333333335, 0.4, 0.36666666666666664] \n", + "\n", + "w = distance ,p = 3 training [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] \n", + "\n", + "w = distance ,p = 3 test [0.5333333333333333, 0.4666666666666667, 0.4666666666666667, 0.3333333333333333, 0.36666666666666664, 0.3333333333333333, 0.36666666666666664, 0.4, 0.4, 0.4, 0.36666666666666664, 0.4, 0.4, 0.4, 0.4] \n", + "\n", + "w = distance ,p = 4 training [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] \n", + "\n", + "w = distance ,p = 4 test [0.5333333333333333, 0.43333333333333335, 0.43333333333333335, 0.3, 0.3333333333333333, 0.36666666666666664, 0.36666666666666664, 0.4, 0.4, 0.36666666666666664, 0.36666666666666664, 0.36666666666666664, 0.36666666666666664, 0.4, 0.3333333333333333] \n", + "\n" + ] + } + ], + "source": [ + "# Now let's explore the impact of using a different weighting scheme\n", + "w_values = [\"uniform\",\"distance\"]\n", + "accuracy_training_k_p_w = []\n", + "accuracy_test_k_p_w = []\n", + "\n", + "for i in range(len(w_values)):\n", + " accuracy_training_k_p_w.append([])\n", + " accuracy_test_k_p_w.append([])\n", + " \n", + " for j in range(len(p_values)):\n", + " accuracy_training_k_p_w[i].append([])\n", + " accuracy_test_k_p_w[i].append([]) \n", + "\n", + " for k in k_values:\n", + " model_k_p_w = neighbors.KNeighborsClassifier(n_neighbors=k, p=p_values[j], weights=w_values[i])\n", + " model_k_p_w.fit(X_training, y_training)\n", + "\n", + " # compute the predictions for the training and test sets\n", + " predictions_training_k_p_w = model_k_p_w.predict(X_training)\n", + " predictions_test_k_p_w = model_k_p_w.predict(X_test)\n", + "\n", + " # compute the accuracy on the training and test set predictions\n", + " accuracy_training_k_p_w[i][j].append(metrics.accuracy_score(y_training, predictions_training_k_p_w))\n", + " accuracy_test_k_p_w[i][j].append(metrics.accuracy_score(y_test, predictions_test_k_p_w))\n", + "\n", + " print(\"w =\",w_values[i],\",p =\",p_values[j],\"training\",accuracy_training_k_p_w[i][j],\"\\n\")\n", + " print(\"w =\",w_values[i],\",p =\",p_values[j],\"test\",accuracy_test_k_p_w[i][j],\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# let's plot the accuracy on the training set\n", + "import matplotlib.pyplot as plt\n", + "plt.scatter(k_values,accuracy_training_k_p_w[0][0],marker=\"x\")\n", + "plt.scatter(k_values,accuracy_training_k_p_w[0][1],marker=\"+\")\n", + "plt.scatter(k_values,accuracy_training_k_p_w[1][0],marker=\"*\")\n", + "plt.scatter(k_values,accuracy_training_k_p_w[1][1],marker=\"s\")\n", + "plt.xlim([0, max(k_values)+2])\n", + "plt.ylim([0.0, 1.1])\n", + "plt.xlabel(\"Value of k\")\n", + "plt.ylabel(\"Accuracy\")\n", + "legend_labels = [\"Training (Manhattan dist.,w=uniform)\",\"Training (Euclidian dist.,w=uniform)\",\n", + " \"Training (Manhattan dist.,w=distance)\",\"Training (Euclidian dist.,w=distance)\"]\n", + "plt.legend(labels=legend_labels, loc=4, borderpad=0.2)\n", + "plt.title(\"Effect of k and p and w on training set accuracy\", fontsize=10)\n", + "plt.show()\n", + "\n", + "# let's plot the accuracy on the test set\n", + "import matplotlib.pyplot as plt\n", + "plt.scatter(k_values,accuracy_test_k_p_w[0][0],marker=\"x\")\n", + "plt.scatter(k_values,accuracy_test_k_p_w[0][1],marker=\"+\")\n", + "plt.scatter(k_values,accuracy_test_k_p_w[1][0],marker=\"*\")\n", + "plt.scatter(k_values,accuracy_test_k_p_w[1][1],marker=\"s\")\n", + "plt.xlim([0, max(k_values)+2])\n", + "plt.ylim([0.0, 1.1])\n", + "plt.xlabel(\"Value of k\")\n", + "plt.ylabel(\"Accuracy\")\n", + "legend_labels = [\"Test (Manhattan dist.,w=uniform)\",\"Test (Euclidian dist.,w=uniform)\",\n", + " \"Training (Manhattan dist.,w=distance)\",\"Training (Euclidian dist.,w=distance)\"]\n", + "plt.legend(labels=legend_labels, loc=1, borderpad=0.2)\n", + "plt.title(\"Effect of k and p and w on test set accuracy\", fontsize=10)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Max test set accuracy: 0.7333333333333333\n", + "Index of max test set accuracy: (1, 0, 1)\n", + "Hyperparameter values: w = distance p = 1 k = 3\n" + ] + } + ], + "source": [ + "# let's find the best test set accuracy, using numpy\n", + "import numpy as np\n", + "np_array = np.array(accuracy_test_k_p_w)\n", + "max_index = np.unravel_index(np_array.argmax(), np_array.shape)\n", + "print(\"Max test set accuracy:\",np_array.max())\n", + "print(\"Index of max test set accuracy:\",max_index)\n", + "print(\"Hyperparameter values: w =\",w_values[max_index[0]],\"p =\",p_values[max_index[1]],\"k =\",k_values[max_index[2]]) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Key take away points from this worked example:\n", + "* Distance weighting seems to work better than uniform weighting on the beer dataset\n", + "* Manhattan distance seems to work best of the 4 distance metrics tested on the beer dataset\n", + "* Lower values of k seem to work better thna higher values on the beer dataset\n", + "* It is important to look at accuracy on both the training and test sets when deciding on model parameters\n", + "* Training set accuracy is usually much higher than test set accuracy\n", + "* The best test set accuracy we found on the beer dataset was with distance weighting, Manhattan distance and k=3. This combination also achieves 100% training set accuracy\n", + "* We have manually explored hyperparameter values in this example, however scikit-learn provides a class called GridSearchCV which can automate the hyperparameter search process (we will cover this in a later lecture) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/weather.csv b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/weather.csv new file mode 100644 index 00000000..38041c7d --- /dev/null +++ b/year4/semester1/CT4101: Machine Learning/materials/topic3/examples/weather.csv @@ -0,0 +1,15 @@ +Play?,Outlook,Temp,Humidity,Windy +no,sunny,hot,high,false +no,sunny,hot,high,true +yes,overcast,hot,high,false +yes,rainy,mild,high,false +yes,rainy,cool,normal,false +no,rainy,cool,normal,true +yes,overcast,cool,normal,true +no,sunny,mild,high,false +yes,sunny,cool,normal,false +yes,rainy,mild,normal,false +yes,sunny,mild,normal,true +yes,overcast,mild,high,true +yes,overcast,hot,normal,false +no,rainy,mild,high,true