[CT4101]: Add Assignment 1 code

This commit is contained in:
2024-10-20 23:38:35 +01:00
parent f34914d83b
commit dd8b8a7bb2
3 changed files with 651 additions and 0 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,187 @@
# -*- coding: utf-8 -*-
"""assignment.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ILT6kccc8NHrY7xonz-ERej4MkGAw1Qv
Set up
"""
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
train_data = pd.read_csv('/content/drive/MyDrive/wildfires_training.csv')
test_data = pd.read_csv('/content/drive/MyDrive/wildfires_test.csv')
X_train = train_data.drop(columns=['fire'])
y_train = train_data['fire']
X_test = test_data.drop(columns=['fire'])
y_test = test_data['fire']
"""RandomForestClassifier with default parameters:"""
# intialise the randomforestclassifier with a set random seed
rfc = RandomForestClassifier(random_state=0)
rfc.fit(X_train, y_train)
# train and get accuracy
train_predictions = rfc.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
train_report = classification_report(y_train, train_predictions)
print(f"Training Accuracy: {train_accuracy:.4f}\n")
print("Classification Report of Testing Results:")
print(train_report)
# test and get accuracy
test_predictions = rfc.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
test_report = classification_report(y_test, test_predictions)
print(f"Testing Accuracy: {test_accuracy:.4f}")
print("Classification Report of Testing Results:")
print(test_report)
# create a confusion matrix to visualise the data
cm = confusion_matrix(y_test, test_predictions)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=np.unique(y_test), yticklabels=np.unique(y_test))
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()
"""RandomForestClassifier with tuning:"""
# initialise a range of hyperparameters to loop over
n_estimators_range = [1, 3, 5, 10, 50, 100, 250, 500, 1000]
max_depth_range = [1, 5, 10, 20, 30, None]
# matrix to store the accuracy of each hyperparameter pair
accuracy_matrix = np.zeros((len(max_depth_range), len(n_estimators_range)))
# variable to track
best_accuracy = 0
best_n_estimators = None
best_max_depth = None
# looping over each hyperparam value
for i, max_depth in enumerate(max_depth_range):
for j, n_estimators in enumerate(n_estimators_range):
rfc = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=0)
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy_matrix[i, j] = accuracy
if accuracy > best_accuracy:
best_accuracy = accuracy
best_n_estimators = n_estimators
best_max_depth = max_depth
# heatmap of accuracies
plt.figure(figsize=(10, 6))
sns.heatmap(accuracy_matrix, annot=True, fmt=".3f", cmap="YlGnBu",
xticklabels=n_estimators_range,
yticklabels=[str(depth) if depth is not None else "None" for depth in max_depth_range])
plt.title('Accuracy for different n_estimators and max_depth values')
plt.xlabel('n_estimators')
plt.ylabel('max_depth')
plt.show()
print(f"Best Accuracy: {best_accuracy:.3f}")
print(f"Best n_estimators: {best_n_estimators}")
print(f"Best max_depth: {best_max_depth}")
"""SVC classifier with default params on the unprocessed data"""
# svc with default params
svc = SVC()
svc.fit(X_train, y_train)
# get training accuracy
svc_train_predictions = svc.predict(X_train)
svc_train_accuracy = accuracy_score(y_train, svc_train_predictions)
svc_train_report = classification_report(y_train, svc_train_predictions)
print(f"Training Accuracy: {svc_train_accuracy:.4f}\n")
print("Classification Report of Training Results:")
print(svc_train_report)
# get testing accuracy
svc_test_predictions = svc.predict(X_test)
svc_test_accuracy = accuracy_score(y_test, svc_test_predictions)
svc_test_report = classification_report(y_test, svc_test_predictions)
print(f"Testing Accuracy: {svc_test_accuracy:.4f}")
print("Classification Report of Testing Results:")
print(svc_test_report)
# confusino matrix of the testing accuracy
cm = confusion_matrix(y_test, svc_test_predictions)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['No', 'Yes'],
yticklabels=['No', 'Yes'])
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix for Testing Results')
plt.show()
"""SVC with hyperparameter tuning"""
# initialise a range of hyperparameters to loop over
C_values = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
kernel_types = ['linear', 'poly', 'rbf', 'sigmoid']
# matrix to store the accuracy of each hyperparameter pair
accuracy_matrix = np.zeros((len(kernel_types), len(C_values)))
# variables to track the best accuracy and corresponding hyperparameters
best_accuracy = 0
best_C = None
best_kernel = None
# looping over each hyperparameter value
for i, kernel in enumerate(kernel_types):
for j, C in enumerate(C_values):
svc = SVC(C=C, kernel=kernel)
svc.fit(X_train, y_train)
svc_test_predictions = svc.predict(X_test)
accuracy = accuracy_score(y_test, svc_test_predictions)
accuracy_matrix[i, j] = accuracy
if accuracy > best_accuracy:
best_accuracy = accuracy
best_C = C
best_kernel = kernel
# heatmap of accuracies
plt.figure(figsize=(10, 6))
sns.heatmap(accuracy_matrix, annot=True, fmt=".2f", cmap='Blues',
xticklabels=[f"{C:.3f}" for C in C_values],
yticklabels=kernel_types)
plt.title('Accuracy for different C values and kernel types')
plt.xlabel('C Value')
plt.ylabel('Kernel Type')
plt.show()
print(f"Best Accuracy: {best_accuracy:.3f}")
print(f"Best C: {best_C:.3f}")
print(f"Best Kernel: {best_kernel}")