Files
fyp/server/src/functions/fetch_transient_data/lambda_function.py

219 lines
7.2 KiB
Python

import json
import csv
import xmltodict
import requests
import os
import boto3
import time
from concurrent.futures import ThreadPoolExecutor
from dotenv import load_dotenv
import re
# Create a reusable session for requests
session = requests.Session()
# Setup DynamoDB client
os.environ.setdefault('AWS_DEFAULT_REGION', 'us-east-1')
dynamodb = boto3.resource("dynamodb")
table_name = os.environ.get("DYNAMODB_TABLE", "transient_data")
table = dynamodb.Table(table_name)
timestamp = str(int(time.time()))
# API URLs
irishrail_url = "http://api.irishrail.ie/realtime/realtime.asmx/"
def fetch_trains():
"""
Fetches train data from the Irish Rail API and parses additional attributes.
Returns:
list: A list of dictionaries containing processed train data.
"""
print("Fetching Irish Rail data.")
api_function = "getCurrentTrainsXML_WithTrainType?TrainType="
train_types = ["M", "S", "D"]
trains = []
for train_type in train_types:
response = session.get(irishrail_url + api_function + train_type)
response.raise_for_status()
trains_xml = response.text
trains_json = xmltodict.parse(trains_xml)
for train in trains_json["ArrayOfObjTrainPositions"]["objTrainPositions"]:
train_code = str(train["TrainCode"])
train_status = train["TrainStatus"]
public_message = train["PublicMessage"]
split_message = public_message.split("\\n")
trainDetails = split_message[1].split("(")[0]
trainUpdate = split_message[2]
# Regex to extract punctuality: Matches positive/negative number followed by "mins late"
match = re.search(r"(-?\d+)\s+mins\s+late", public_message)
punctuality = int(match.group(1)) if match else 0 # Default to 0 if no match
if punctuality < 0:
punctuality_status = "early"
lateness_message = f"{-punctuality} minute{'s' if punctuality != -1 else ''} early"
elif punctuality == 0:
punctuality_status = "on-time"
lateness_message = "On time"
else:
punctuality_status = "late"
lateness_message = f"{punctuality} minute{'s' if punctuality != 1 else ''} late"
train_type_full = {
"M": "Mainline",
"S": "Suburban",
"D": "DART"
}.get(train_type, "Unknown")
train_status_full = {
"R": "Running",
"T": "Terminated",
"N": "Not yet running"
}.get(train_status, "Unknown")
trains.append({
"objectID": "IrishRailTrain-" + train_code,
"objectType": "IrishRailTrain",
"timestamp": timestamp,
"latitude": str(train["TrainLatitude"]),
"longitude": str(train["TrainLongitude"]),
"trainCode": train_code,
"trainType": train_type,
"trainTypeFull": train_type_full,
"trainStatus": train_status,
"trainStatusFull": train_status_full,
"trainDate": str(train["TrainDate"]),
"trainPublicMessage": public_message,
"trainDirection": train["Direction"],
"trainPunctuality": punctuality,
"trainPunctualityStatus": punctuality_status,
"latenessMessage": lateness_message,
"trainDetails": trainDetails,
"trainUpdate": trainUpdate
})
return trains
def fetch_bus_routes():
"""
Fetches bus route data from the permanent data API.
Returns:
list: A list of dictionaries containing bus route data.
"""
permanent_data_api = os.environ["PERMANENT_DATA_API"]
response = session.get(permanent_data_api + "?objectType=BusRoute")
response.raise_for_status()
return response.json()
def fetch_buses():
"""
Fetches bus data from the National Transport API.
Returns:
list: A list of dictionaries containing bus data.
"""
print("Fetching bus data.")
buses = []
api_url = "https://api.nationaltransport.ie/gtfsr/v2/Vehicles?format=json"
headers = {
"Cache-Control": "no-cache",
"x-api-key": os.getenv("GTFS_KEY")
}
response = session.get(api_url, headers=headers)
response.raise_for_status()
buses_json = response.json()
bus_routes_list = fetch_bus_routes()
bus_routes_hashmap = {route["busRouteID"]: route for route in bus_routes_list if "busRouteID" in route}
for bus in buses_json["entity"]:
busRouteID = str(bus["vehicle"]["trip"]["route_id"])
route_info = bus_routes_hashmap.get(busRouteID, {})
buses.append({
"objectID": "Bus-" + bus["id"],
"objectType": "Bus",
"timestamp": timestamp,
"latitude": str(bus["vehicle"]["position"]["latitude"]),
"longitude": str(bus["vehicle"]["position"]["longitude"]),
"busID": str(bus["id"]),
"busTripID": str(bus["vehicle"]["trip"]["trip_id"]),
"busStartTime": str(bus["vehicle"]["trip"]["start_time"]),
"busStartDate": str(bus["vehicle"]["trip"]["start_date"]),
"busScheduleRelationship": str(bus["vehicle"]["trip"]["schedule_relationship"]),
"busRoute": busRouteID,
"busRouteAgencyName": route_info.get("busRouteAgencyName", ""),
"busRouteLongName": route_info.get("busRouteLongName", ""),
"busRouteShortName": route_info.get("busRouteShortName", ""),
"busDirection": str(bus["vehicle"]["trip"]["direction_id"]),
})
return buses
def batch_upload_to_dynamodb(data):
"""
Uploads data to DynamoDB in batches.
Args:
data (list): A list of dictionaries containing data to be uploaded.
"""
with table.batch_writer() as batch:
for item in data:
batch.put_item(Item=item)
def lambda_handler(event, context):
"""
AWS Lambda handler function to fetch and upload data.
Args:
event (dict): Event data passed to the Lambda function.
context (object): Runtime information of the Lambda function.
Returns:
dict: A dictionary containing the status code and message.
"""
print("Lambda handler triggered; fetching data.")
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(fetch_trains),
executor.submit(fetch_buses)
]
data = []
for future in futures:
data.extend(future.result())
print(f"Retrieved {len(data)} records.")
print("Uploading to DynamoDB...")
batch_upload_to_dynamodb(data)
print("Upload completed.")
return {
'statusCode': 200,
'body': json.dumps({'message': 'Data uploaded successfully!'})
}
if __name__ == "__main__":
"""
Main function to fetch and print data locally.
"""
load_dotenv()
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(fetch_trains),
executor.submit(fetch_buses)
]
data = []
for future in futures:
data.extend(future.result())
print(json.dumps(data))