77 lines
2.6 KiB
Matlab
77 lines
2.6 KiB
Matlab
clear;
|
|
|
|
% 1. read in the file and confirm the number of records (336,766)
|
|
flights = readtable("Flights.csv");
|
|
numRecords = height(flights)
|
|
|
|
% 2. convert "origin" & "dest" to strings (from cell type)
|
|
flights.origin = string(flights.origin);
|
|
flights.dest = string(flights.dest);
|
|
|
|
|
|
% 3. check the number of missing values for the departure time
|
|
numMissing = sum(ismissing(flights.dep_delay))
|
|
|
|
% 4. filter all the missing values from the departure delay and check the
|
|
% difference in the number of records
|
|
flights_clean = flights(~isnan(flights.dep_delay), :);
|
|
numRecordsClean = height(flights_clean);
|
|
disp("Number of records in flights = " + numRecords);
|
|
disp("Number of records in flights_clean = " + numRecordsClean);
|
|
|
|
% 5. confirm the difference in records between the two tables
|
|
diff = numRecords - numRecordsClean
|
|
|
|
% 6. Remove any departure delay greater than 2 hours (120 minutes). This
|
|
% leaves 318,798 observations.
|
|
flights_final = flights_clean(flights_clean.dep_delay <= 120, :);
|
|
height(flights_final)
|
|
|
|
% 7. Generate the following table and graph, showing the average delay per
|
|
% month.
|
|
months = unique(flights_final.Month);
|
|
|
|
res1 = table(months, zeros(size(months)), 'VariableNames', {'Month', 'AvgDelayMonth'});
|
|
for i = months(1):length(months)
|
|
month_delays = flights_final.dep_delay(flights_final.Month == i);
|
|
avg_delay_month = mean(month_delays);
|
|
res1.AvgDelayMonth(i) = avg_delay_month;
|
|
end
|
|
res1
|
|
plot(res1.Month, res1.AvgDelayMonth, '-o');
|
|
title('Average Delay by Month');
|
|
|
|
% 8. Generate the following table and graph, showing the average delay per
|
|
% hour.
|
|
hours = transpose(1:24);
|
|
|
|
res2 = table(hours, zeros(size(hours)), 'VariableNames', {'Hour', 'AvgDelayHour'});
|
|
for i = hours(1):length(hours)
|
|
hour_delays = flights_final.dep_delay(flights_final.hour == i);
|
|
avg_delay_hour = mean(hour_delays);
|
|
res2.AvgDelayHour(i) = avg_delay_hour;
|
|
end
|
|
res2 = res2(~isnan(res2.AvgDelayHour),:);
|
|
res2
|
|
plot(res2.Hour, res2.AvgDelayHour, '-o');
|
|
title('Average Delay by Hour of the Day');
|
|
|
|
% 9. Generate the following table and graph, showing the average delay by
|
|
% month and by origin
|
|
res3 = renamevars(removevars(groupsummary(flights_final,["Month","origin"],"mean","dep_delay"),'GroupCount'), 'mean_dep_delay', 'AvrDelayMonthOrigin')
|
|
|
|
jfk = res3(res3.origin == 'JFK', {'Month', 'AvrDelayMonthOrigin'});
|
|
subplot(3,1,1);
|
|
plot(jfk.Month, jfk.AvrDelayMonthOrigin, '-o');
|
|
title("JFK");
|
|
|
|
ewr = res3(res3.origin == 'EWR', {'Month', 'AvrDelayMonthOrigin'});
|
|
subplot(3,1,2);
|
|
plot(ewr.Month, ewr.AvrDelayMonthOrigin, '-o');
|
|
title("EWR");
|
|
|
|
lga = res3(res3.origin == 'LGA', {'Month', 'AvrDelayMonthOrigin'});
|
|
subplot(3,1,3);
|
|
plot(lga.Month, lga.AvrDelayMonthOrigin, '-o');
|
|
title("LGA");
|