[CT4101]: Add Assignment 1 code

2024-10-20 23:38:35 +01:00
parent f34914d83b
commit dd8b8a7bb2
3 changed files with 651 additions and 0 deletions
--- a/Learning/assignments/assignment1/code/CT4101_A1_Hayes_Andrew_code.zip
+++ b/Learning/assignments/assignment1/code/CT4101_A1_Hayes_Andrew_code.zip
--- a/Learning/assignments/assignment1/code/assignment.ipynb
+++ b/Learning/assignments/assignment1/code/assignment.ipynb
--- a/Learning/assignments/assignment1/code/assignment.py
+++ b/Learning/assignments/assignment1/code/assignment.py
@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+"""assignment.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1ILT6kccc8NHrY7xonz-ERej4MkGAw1Qv
+
+Set up
+"""
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import accuracy_score, classification_report
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+from sklearn.metrics import confusion_matrix
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.svm import SVC
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+
+train_data = pd.read_csv('/content/drive/MyDrive/wildfires_training.csv')
+test_data = pd.read_csv('/content/drive/MyDrive/wildfires_test.csv')
+
+X_train = train_data.drop(columns=['fire'])
+y_train = train_data['fire']
+
+X_test = test_data.drop(columns=['fire'])
+y_test = test_data['fire']
+
+"""RandomForestClassifier with default parameters:"""
+
+# intialise the randomforestclassifier with a set random seed
+rfc = RandomForestClassifier(random_state=0)
+rfc.fit(X_train, y_train)
+
+# train and get accuracy
+train_predictions = rfc.predict(X_train)
+train_accuracy = accuracy_score(y_train, train_predictions)
+train_report = classification_report(y_train, train_predictions)
+
+print(f"Training Accuracy: {train_accuracy:.4f}\n")
+print("Classification Report of Testing Results:")
+print(train_report)
+
+# test and get accuracy
+test_predictions = rfc.predict(X_test)
+test_accuracy = accuracy_score(y_test, test_predictions)
+test_report = classification_report(y_test, test_predictions)
+
+print(f"Testing Accuracy: {test_accuracy:.4f}")
+print("Classification Report of Testing Results:")
+print(test_report)
+
+# create a confusion matrix to visualise the data
+cm = confusion_matrix(y_test, test_predictions)
+plt.figure(figsize=(8, 6))
+sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+            xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
+plt.ylabel('Actual')
+plt.xlabel('Predicted')
+plt.title('Confusion Matrix')
+plt.show()
+
+"""RandomForestClassifier with tuning:"""
+
+# initialise a range of hyperparameters to loop over
+n_estimators_range = [1, 3, 5, 10, 50, 100, 250, 500, 1000]
+max_depth_range = [1, 5, 10, 20, 30, None]
+
+# matrix to store the accuracy of each hyperparameter pair
+accuracy_matrix = np.zeros((len(max_depth_range), len(n_estimators_range)))
+
+# variable to track
+best_accuracy = 0
+best_n_estimators = None
+best_max_depth = None
+
+# looping over each hyperparam value
+for i, max_depth in enumerate(max_depth_range):
+    for j, n_estimators in enumerate(n_estimators_range):
+        rfc = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=0)
+        rfc.fit(X_train, y_train)
+
+        y_pred = rfc.predict(X_test)
+        accuracy = accuracy_score(y_test, y_pred)
+        accuracy_matrix[i, j] = accuracy
+
+        if accuracy > best_accuracy:
+            best_accuracy = accuracy
+            best_n_estimators = n_estimators
+            best_max_depth = max_depth
+
+# heatmap of accuracies
+plt.figure(figsize=(10, 6))
+sns.heatmap(accuracy_matrix, annot=True, fmt=".3f", cmap="YlGnBu",
+            xticklabels=n_estimators_range,
+            yticklabels=[str(depth) if depth is not None else "None" for depth in max_depth_range])
+plt.title('Accuracy for different n_estimators and max_depth values')
+plt.xlabel('n_estimators')
+plt.ylabel('max_depth')
+plt.show()
+
+print(f"Best Accuracy: {best_accuracy:.3f}")
+print(f"Best n_estimators: {best_n_estimators}")
+print(f"Best max_depth: {best_max_depth}")
+
+"""SVC classifier with default params on the unprocessed data"""
+
+# svc with default params
+svc = SVC()
+svc.fit(X_train, y_train)
+
+# get training accuracy
+svc_train_predictions = svc.predict(X_train)
+svc_train_accuracy = accuracy_score(y_train, svc_train_predictions)
+svc_train_report = classification_report(y_train, svc_train_predictions)
+
+print(f"Training Accuracy: {svc_train_accuracy:.4f}\n")
+print("Classification Report of Training Results:")
+print(svc_train_report)
+
+# get testing accuracy
+svc_test_predictions = svc.predict(X_test)
+svc_test_accuracy = accuracy_score(y_test, svc_test_predictions)
+svc_test_report = classification_report(y_test, svc_test_predictions)
+
+print(f"Testing Accuracy: {svc_test_accuracy:.4f}")
+print("Classification Report of Testing Results:")
+print(svc_test_report)
+
+# confusino matrix of the testing accuracy
+cm = confusion_matrix(y_test, svc_test_predictions)
+plt.figure(figsize=(8, 6))
+sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+            xticklabels=['No', 'Yes'],
+            yticklabels=['No', 'Yes'])
+plt.xlabel('Predicted Label')
+plt.ylabel('True Label')
+plt.title('Confusion Matrix for Testing Results')
+plt.show()
+
+"""SVC with hyperparameter tuning"""
+
+# initialise a range of hyperparameters to loop over
+C_values = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
+kernel_types = ['linear', 'poly', 'rbf', 'sigmoid']
+
+# matrix to store the accuracy of each hyperparameter pair
+accuracy_matrix = np.zeros((len(kernel_types), len(C_values)))
+
+# variables to track the best accuracy and corresponding hyperparameters
+best_accuracy = 0
+best_C = None
+best_kernel = None
+
+# looping over each hyperparameter value
+for i, kernel in enumerate(kernel_types):
+    for j, C in enumerate(C_values):
+        svc = SVC(C=C, kernel=kernel)
+        svc.fit(X_train, y_train)
+
+        svc_test_predictions = svc.predict(X_test)
+        accuracy = accuracy_score(y_test, svc_test_predictions)
+        accuracy_matrix[i, j] = accuracy
+
+        if accuracy > best_accuracy:
+            best_accuracy = accuracy
+            best_C = C
+            best_kernel = kernel
+
+# heatmap of accuracies
+plt.figure(figsize=(10, 6))
+sns.heatmap(accuracy_matrix, annot=True, fmt=".2f", cmap='Blues',
+            xticklabels=[f"{C:.3f}" for C in C_values],
+            yticklabels=kernel_types)
+plt.title('Accuracy for different C values and kernel types')
+plt.xlabel('C Value')
+plt.ylabel('Kernel Type')
+plt.show()
+
+print(f"Best Accuracy: {best_accuracy:.3f}")
+print(f"Best C: {best_C:.3f}")
+print(f"Best Kernel: {best_kernel}")