Files
uni/second/semester2/CT248/Assignments/Assignment-08/six.m
2023-12-07 01:19:12 +00:00

77 lines
2.6 KiB
Matlab

clear;
% 1. read in the file and confirm the number of records (336,766)
flights = readtable("Flights.csv");
numRecords = height(flights)
% 2. convert "origin" & "dest" to strings (from cell type)
flights.origin = string(flights.origin);
flights.dest = string(flights.dest);
% 3. check the number of missing values for the departure time
numMissing = sum(ismissing(flights.dep_delay))
% 4. filter all the missing values from the departure delay and check the
% difference in the number of records
flights_clean = flights(~isnan(flights.dep_delay), :);
numRecordsClean = height(flights_clean);
disp("Number of records in flights = " + numRecords);
disp("Number of records in flights_clean = " + numRecordsClean);
% 5. confirm the difference in records between the two tables
diff = numRecords - numRecordsClean
% 6. Remove any departure delay greater than 2 hours (120 minutes). This
% leaves 318,798 observations.
flights_final = flights_clean(flights_clean.dep_delay <= 120, :);
height(flights_final)
% 7. Generate the following table and graph, showing the average delay per
% month.
months = unique(flights_final.Month);
res1 = table(months, zeros(size(months)), 'VariableNames', {'Month', 'AvgDelayMonth'});
for i = months(1):length(months)
month_delays = flights_final.dep_delay(flights_final.Month == i);
avg_delay_month = mean(month_delays);
res1.AvgDelayMonth(i) = avg_delay_month;
end
res1
plot(res1.Month, res1.AvgDelayMonth, '-o');
title('Average Delay by Month');
% 8. Generate the following table and graph, showing the average delay per
% hour.
hours = transpose(1:24);
res2 = table(hours, zeros(size(hours)), 'VariableNames', {'Hour', 'AvgDelayHour'});
for i = hours(1):length(hours)
hour_delays = flights_final.dep_delay(flights_final.hour == i);
avg_delay_hour = mean(hour_delays);
res2.AvgDelayHour(i) = avg_delay_hour;
end
res2 = res2(~isnan(res2.AvgDelayHour),:);
res2
plot(res2.Hour, res2.AvgDelayHour, '-o');
title('Average Delay by Hour of the Day');
% 9. Generate the following table and graph, showing the average delay by
% month and by origin
res3 = renamevars(removevars(groupsummary(flights_final,["Month","origin"],"mean","dep_delay"),'GroupCount'), 'mean_dep_delay', 'AvrDelayMonthOrigin')
jfk = res3(res3.origin == 'JFK', {'Month', 'AvrDelayMonthOrigin'});
subplot(3,1,1);
plot(jfk.Month, jfk.AvrDelayMonthOrigin, '-o');
title("JFK");
ewr = res3(res3.origin == 'EWR', {'Month', 'AvrDelayMonthOrigin'});
subplot(3,1,2);
plot(ewr.Month, ewr.AvrDelayMonthOrigin, '-o');
title("EWR");
lga = res3(res3.origin == 'LGA', {'Month', 'AvrDelayMonthOrigin'});
subplot(3,1,3);
plot(lga.Month, lga.AvrDelayMonthOrigin, '-o');
title("LGA");