# -*- coding: utf-8 -*- """assignment.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1ILT6kccc8NHrY7xonz-ERej4MkGAw1Qv Set up """ from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score, classification_report from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from sklearn.metrics import confusion_matrix from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.svm import SVC import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns train_data = pd.read_csv('/content/drive/MyDrive/wildfires_training.csv') test_data = pd.read_csv('/content/drive/MyDrive/wildfires_test.csv') X_train = train_data.drop(columns=['fire']) y_train = train_data['fire'] X_test = test_data.drop(columns=['fire']) y_test = test_data['fire'] """RandomForestClassifier with default parameters:""" # intialise the randomforestclassifier with a set random seed rfc = RandomForestClassifier(random_state=0) rfc.fit(X_train, y_train) # train and get accuracy train_predictions = rfc.predict(X_train) train_accuracy = accuracy_score(y_train, train_predictions) train_report = classification_report(y_train, train_predictions) print(f"Training Accuracy: {train_accuracy:.4f}\n") print("Classification Report of Testing Results:") print(train_report) # test and get accuracy test_predictions = rfc.predict(X_test) test_accuracy = accuracy_score(y_test, test_predictions) test_report = classification_report(y_test, test_predictions) print(f"Testing Accuracy: {test_accuracy:.4f}") print("Classification Report of Testing Results:") print(test_report) # create a confusion matrix to visualise the data cm = confusion_matrix(y_test, test_predictions) plt.figure(figsize=(8, 6)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_test), yticklabels=np.unique(y_test)) plt.ylabel('Actual') plt.xlabel('Predicted') plt.title('Confusion Matrix') plt.show() """RandomForestClassifier with tuning:""" # initialise a range of hyperparameters to loop over n_estimators_range = [1, 3, 5, 10, 50, 100, 250, 500, 1000] max_depth_range = [1, 5, 10, 20, 30, None] # matrix to store the accuracy of each hyperparameter pair accuracy_matrix = np.zeros((len(max_depth_range), len(n_estimators_range))) # variable to track best_accuracy = 0 best_n_estimators = None best_max_depth = None # looping over each hyperparam value for i, max_depth in enumerate(max_depth_range): for j, n_estimators in enumerate(n_estimators_range): rfc = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=0) rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test) accuracy = accuracy_score(y_test, y_pred) accuracy_matrix[i, j] = accuracy if accuracy > best_accuracy: best_accuracy = accuracy best_n_estimators = n_estimators best_max_depth = max_depth # heatmap of accuracies plt.figure(figsize=(10, 6)) sns.heatmap(accuracy_matrix, annot=True, fmt=".3f", cmap="YlGnBu", xticklabels=n_estimators_range, yticklabels=[str(depth) if depth is not None else "None" for depth in max_depth_range]) plt.title('Accuracy for different n_estimators and max_depth values') plt.xlabel('n_estimators') plt.ylabel('max_depth') plt.show() print(f"Best Accuracy: {best_accuracy:.3f}") print(f"Best n_estimators: {best_n_estimators}") print(f"Best max_depth: {best_max_depth}") """SVC classifier with default params on the unprocessed data""" # svc with default params svc = SVC() svc.fit(X_train, y_train) # get training accuracy svc_train_predictions = svc.predict(X_train) svc_train_accuracy = accuracy_score(y_train, svc_train_predictions) svc_train_report = classification_report(y_train, svc_train_predictions) print(f"Training Accuracy: {svc_train_accuracy:.4f}\n") print("Classification Report of Training Results:") print(svc_train_report) # get testing accuracy svc_test_predictions = svc.predict(X_test) svc_test_accuracy = accuracy_score(y_test, svc_test_predictions) svc_test_report = classification_report(y_test, svc_test_predictions) print(f"Testing Accuracy: {svc_test_accuracy:.4f}") print("Classification Report of Testing Results:") print(svc_test_report) # confusino matrix of the testing accuracy cm = confusion_matrix(y_test, svc_test_predictions) plt.figure(figsize=(8, 6)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No', 'Yes'], yticklabels=['No', 'Yes']) plt.xlabel('Predicted Label') plt.ylabel('True Label') plt.title('Confusion Matrix for Testing Results') plt.show() """SVC with hyperparameter tuning""" # initialise a range of hyperparameters to loop over C_values = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000] kernel_types = ['linear', 'poly', 'rbf', 'sigmoid'] # matrix to store the accuracy of each hyperparameter pair accuracy_matrix = np.zeros((len(kernel_types), len(C_values))) # variables to track the best accuracy and corresponding hyperparameters best_accuracy = 0 best_C = None best_kernel = None # looping over each hyperparameter value for i, kernel in enumerate(kernel_types): for j, C in enumerate(C_values): svc = SVC(C=C, kernel=kernel) svc.fit(X_train, y_train) svc_test_predictions = svc.predict(X_test) accuracy = accuracy_score(y_test, svc_test_predictions) accuracy_matrix[i, j] = accuracy if accuracy > best_accuracy: best_accuracy = accuracy best_C = C best_kernel = kernel # heatmap of accuracies plt.figure(figsize=(10, 6)) sns.heatmap(accuracy_matrix, annot=True, fmt=".2f", cmap='Blues', xticklabels=[f"{C:.3f}" for C in C_values], yticklabels=kernel_types) plt.title('Accuracy for different C values and kernel types') plt.xlabel('C Value') plt.ylabel('Kernel Type') plt.show() print(f"Best Accuracy: {best_accuracy:.3f}") print(f"Best C: {best_C:.3f}") print(f"Best Kernel: {best_kernel}")