Move all scripts to 'src' directory

This commit is contained in:
2025-01-09 21:30:48 +00:00
parent 147d81e546
commit a2075e8363
4 changed files with 0 additions and 0 deletions

42
src/focloir_wotd.pl Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/perl
# Script to scrape the Irish dictionary website focloir.ie and turn the "Word of the Day" into an RSS item
use strict;
use warnings;
use HTML::TreeBuilder;
use Encode;
# use UTF-8 when writing to STDOUT
binmode(STDOUT, ":encoding(utf8)");
my $url = "https://www.focloir.ie/en/";
my $user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"; # focloir.ie blocks curl and wget without this
my $html = `curl --user-agent "$user_agent" "$url"`;
my $tree = HTML::TreeBuilder->new;
$tree->parse(decode('UTF-8', $html));
$tree->eof;
my $div = $tree->look_down(_tag => "div", class => "wotdEntry");
my $a_tag = $div->look_down(_tag => "div", class => "wotdEntryHdr")->look_down(_tag => "a");
my $word = $a_tag->look_down(_tag => "span")->as_text;
my $link = $a_tag->attr("href");
my $entry = $div->look_down(_tag => "div", class => "wotdEntryBody")->as_text;
print("
<rss xmlns:atom='http://www.w3.org/2005/Atom' version='2.0'>
<channel>
<title>Focloir.ie: Focal an Lae</title>
<link>" . $url . "</link>
<item>
<title>" . $word . "</title>
<link>" . $link . "</link>
<pubDate>" . `date "+%a, %d %b %Y %H:%M:%S %z"` . "</pubDate>
<description><![CDATA[" . $word . "<br>" . $entry . "]]></description>
</item>
</channel>
</rss>
");
$tree = $tree->delete;

58
src/roisin_dubh_listings.pl Executable file
View File

@ -0,0 +1,58 @@
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use JSON;
use Date;
use Date::Parse;
# use UTF-8 when writing to STDOUT
binmode(STDOUT, ":encoding(utf8)");
sub yes_or_no {
my ($boolean) = @_;
return($boolean eq "1" ? "Yes" : "No");
}
my $listings = decode_json(`curl "https://roisindubh.net/remote/searchlistings.json"`)->{results};
print("
<rss xmlns:atom='http://www.w3.org/2005/Atom' version='2.0'>
<channel><title>Róisín Dubh Listings</title><link>https://roisindubh.net/listings/</link>");
foreach my $listing (@$listings) {
my $event_date = str2time($listing->{event_date_time});
# only print data if event data is in the future
if ($event_date > Date::now()) {
print("
<item>
<title><![CDATA[" . $listing->{pagetitle} . "]]></title>
<link>https://roisindubh.net/listings/" . $listing->{alias} . "</link>
<pubDate>" . Date::strftime("%a, %d %b %Y %H:%M:%S %z", $event_date) . "</pubDate>
<description>
<![CDATA[
" . $listing->{introtext} . "
" . $listing->{content} . "
Location: " . $listing->{name} . "<br>
Event start time: " . Date::strftime("%Y-%m-%d %a %H:%M:%S", $event_date) . "<br>
Late night?: " . yes_or_no($listing->{late_night}) . "<br>
Postponed?: " . yes_or_no($listing->{postponed}) . "<br>
<br>
Ticket Price: €" . $listing->{prices}->{regular} . "<br>
Ticket Allocation: " . $listing->{ticket_allocation} . "<br>
Tickets remaining?: " . yes_or_no($listing->{ticket_remaining}) . "<br>
<br>
Sales start time: " . Date::strftime("%Y-%m-%d %a %H:%M:%S", str2time($listing->{sales_start})) . "<br>
On Sale?: " . yes_or_no($listing->{on_sale}) . "<br>
]]>
</description>
</item>
");
}
}
print("</channel></rss>");

43
src/roisin_dubh_listings.py Executable file
View File

@ -0,0 +1,43 @@
#!/bin/python3
# Script to consume the event listings JSON feed for the Róisín Dubh pub in Galway and generate an RSS feed
# The page does appear to offer an RSS feed but it's broken and returns dates in the far future and distant past
import requests
from datetime import datetime
listings = requests.get('https://roisindubh.net/remote/searchlistings.json').json()['results']
# using several print statements to prioritise code readability over efficiency (I/O speeds are unimportant to me)
print('<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">')
print('<channel><title>Roisín Dubh Listings</title><link>https://roisindubh.net/listings/</link>')
for listing in listings:
if datetime.strptime(listing['event_date_time'], '%Y-%m-%dT%H:%M:%S') < datetime.now():
continue
print('<item>')
print('<title><![CDATA[' + listing['pagetitle'] + ']]></title>')
print('<link>https://roisindubh.net/listings/' + listing['alias'] + '</link>')
print('<description> <![CDATA[')
print(listing['introtext'] + '\n' + listing['content'] + '\n')
print('Ticket Price: €' + str(listing['prices']['regular']) + '<br>')
print('Location: ' + listing['name'] + '<br>')
print('Ticket Allocation: ' + listing['ticket_allocation'] + '<br>')
print('Tickets remaining?: ' + str(listing['ticket_remaining'] == '1') + '<br>')
print('Event start time: ' + listing['event_date_time'] + '<br>')
print('Late night?: ' + str(listing['late_night'] == '1') + '<br>')
print('Postponed?: ' + str(listing['postponed'] == '1') + '<br>')
print('Sales start time: ' + listing['sales_start'] + '<br>')
print('On Sale?: ' + str(listing['on_sale'] == '1') + '<br>')
print('Tickets remaining?: ' + str(listing['ticket_remaining'] == '1') + '<br>')
if listing['external_ticket_url']:
print('External Ticket URL: <a href="' + listing['external_ticket_url'] + '">' + listing['external_ticket_url'] + '</a>')
print(']]> </description>')
print('<pubDate>' + datetime.strptime(listing['event_date_time'], '%Y-%m-%dT%H:%M:%S').strftime('%a, %d %b %Y %H:%M:%S %z') + '</pubDate>')
print('</item>')
print('</channel></rss>')

41
src/teanglann_wotd.pl Executable file
View File

@ -0,0 +1,41 @@
#!/usr/bin/perl
# Script to scrape the Irish dictionary website teanglann.ie and turn the "Word of the Day" into an RSS item
use strict;
use warnings;
use HTML::TreeBuilder;
use Encode;
# use UTF-8 when writing to STDOUT
binmode(STDOUT, ":encoding(utf8)");
my $url = "https://www.teanglann.ie/en/";
my $html = `curl "$url"`;
my $tree = HTML::TreeBuilder->new;
$tree->parse(decode('UTF-8', $html));
$tree->eof;
my $div = $tree->look_down(_tag => "div", class => "wod");
my $a_tag = $div->look_down(_tag => "a", class => "headword");
my $entry = $div->look_down(_tag => "span", class => "entry");
my $word = $a_tag->as_text;
my $link = $a_tag->attr("href");
print("
<rss xmlns:atom='http://www.w3.org/2005/Atom' version='2.0'>
<channel>
<title>Teanglann.ie: Focal an Lae</title>
<link>" . $url . "</link>
<item>
<title>" . $word . "</title>
<link>https://www.teanglann.ie" . $link . "</link>
<pubDate>" . `date "+%a, %d %b %Y %H:%M:%S %z"` . "</pubDate>
<description><![CDATA[" . $entry->as_text . "]]></description>
</item>
</channel>
</rss>
");
$tree = $tree->delete;