Compare commits
10 Commits
76a93f546a
...
a2075e8363
Author | SHA1 | Date | |
---|---|---|---|
a2075e8363 | |||
147d81e546 | |||
e83f513313 | |||
c98f5a4d32 | |||
441ca95add | |||
cb15f1ef20 | |||
c9816cbcea | |||
e8d0b4818e | |||
b05e6bb8e0 | |||
3afb45ff8d |
18
README.md
18
README.md
@ -1,2 +1,20 @@
|
|||||||
# custom_rss_generators
|
# custom_rss_generators
|
||||||
A collection of scripts that generate custom RSS feeds
|
A collection of scripts that generate custom RSS feeds
|
||||||
|
|
||||||
|
The scripts in this repository do not by default generate output files,
|
||||||
|
instead only outputting to `stdout`.
|
||||||
|
This is quite elegant if your RSS feed reader supports reading in from
|
||||||
|
`stdout` as it avoid the generation of intermediary files.
|
||||||
|
For example, in my [`newsraft`](https://codeberg.org/newsraft/newsraft) `feeds`
|
||||||
|
configuration file, I have the following lines:
|
||||||
|
```
|
||||||
|
@ Events
|
||||||
|
$(python3 ~/code/python/custom_rss_generators/roisin_dubh_listings.py 2>/dev/null) " Róisín Dubh Event Listings"
|
||||||
|
```
|
||||||
|
|
||||||
|
However, if your RSS feed reader does not support reading in from `stdout` or running
|
||||||
|
executables to generate feeds, you will need to find another way to utilise these scripts, e.g.
|
||||||
|
generating `rss.xml` files on a cron schedule and reading from those in your feed reader by using
|
||||||
|
the a link beginning with `file://` or if possible, doing some kind of command injection attack
|
||||||
|
via your configuration file to force your feed reader to execute the script (although if successful
|
||||||
|
& not a deliberate design feature this likely indicates a security issue with your feed reader).
|
||||||
|
42
src/focloir_wotd.pl
Executable file
42
src/focloir_wotd.pl
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
# Script to scrape the Irish dictionary website focloir.ie and turn the "Word of the Day" into an RSS item
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use HTML::TreeBuilder;
|
||||||
|
use Encode;
|
||||||
|
|
||||||
|
# use UTF-8 when writing to STDOUT
|
||||||
|
binmode(STDOUT, ":encoding(utf8)");
|
||||||
|
|
||||||
|
my $url = "https://www.focloir.ie/en/";
|
||||||
|
my $user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"; # focloir.ie blocks curl and wget without this
|
||||||
|
my $html = `curl --user-agent "$user_agent" "$url"`;
|
||||||
|
|
||||||
|
my $tree = HTML::TreeBuilder->new;
|
||||||
|
$tree->parse(decode('UTF-8', $html));
|
||||||
|
$tree->eof;
|
||||||
|
|
||||||
|
my $div = $tree->look_down(_tag => "div", class => "wotdEntry");
|
||||||
|
my $a_tag = $div->look_down(_tag => "div", class => "wotdEntryHdr")->look_down(_tag => "a");
|
||||||
|
|
||||||
|
my $word = $a_tag->look_down(_tag => "span")->as_text;
|
||||||
|
my $link = $a_tag->attr("href");
|
||||||
|
my $entry = $div->look_down(_tag => "div", class => "wotdEntryBody")->as_text;
|
||||||
|
|
||||||
|
print("
|
||||||
|
<rss xmlns:atom='http://www.w3.org/2005/Atom' version='2.0'>
|
||||||
|
<channel>
|
||||||
|
<title>Focloir.ie: Focal an Lae</title>
|
||||||
|
<link>" . $url . "</link>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>" . $word . "</title>
|
||||||
|
<link>" . $link . "</link>
|
||||||
|
<pubDate>" . `date "+%a, %d %b %Y %H:%M:%S %z"` . "</pubDate>
|
||||||
|
<description><![CDATA[" . $word . "<br>" . $entry . "]]></description>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
");
|
||||||
|
|
||||||
|
$tree = $tree->delete;
|
58
src/roisin_dubh_listings.pl
Executable file
58
src/roisin_dubh_listings.pl
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use utf8;
|
||||||
|
use JSON;
|
||||||
|
use Date;
|
||||||
|
use Date::Parse;
|
||||||
|
|
||||||
|
# use UTF-8 when writing to STDOUT
|
||||||
|
binmode(STDOUT, ":encoding(utf8)");
|
||||||
|
|
||||||
|
sub yes_or_no {
|
||||||
|
my ($boolean) = @_;
|
||||||
|
return($boolean eq "1" ? "Yes" : "No");
|
||||||
|
}
|
||||||
|
|
||||||
|
my $listings = decode_json(`curl "https://roisindubh.net/remote/searchlistings.json"`)->{results};
|
||||||
|
|
||||||
|
print("
|
||||||
|
<rss xmlns:atom='http://www.w3.org/2005/Atom' version='2.0'>
|
||||||
|
<channel><title>Róisín Dubh Listings</title><link>https://roisindubh.net/listings/</link>");
|
||||||
|
|
||||||
|
foreach my $listing (@$listings) {
|
||||||
|
my $event_date = str2time($listing->{event_date_time});
|
||||||
|
|
||||||
|
# only print data if event data is in the future
|
||||||
|
if ($event_date > Date::now()) {
|
||||||
|
print("
|
||||||
|
<item>
|
||||||
|
<title><![CDATA[" . $listing->{pagetitle} . "]]></title>
|
||||||
|
<link>https://roisindubh.net/listings/" . $listing->{alias} . "</link>
|
||||||
|
<pubDate>" . Date::strftime("%a, %d %b %Y %H:%M:%S %z", $event_date) . "</pubDate>
|
||||||
|
|
||||||
|
<description>
|
||||||
|
<![CDATA[
|
||||||
|
" . $listing->{introtext} . "
|
||||||
|
|
||||||
|
" . $listing->{content} . "
|
||||||
|
|
||||||
|
Location: " . $listing->{name} . "<br>
|
||||||
|
Event start time: " . Date::strftime("%Y-%m-%d %a %H:%M:%S", $event_date) . "<br>
|
||||||
|
Late night?: " . yes_or_no($listing->{late_night}) . "<br>
|
||||||
|
Postponed?: " . yes_or_no($listing->{postponed}) . "<br>
|
||||||
|
<br>
|
||||||
|
Ticket Price: €" . $listing->{prices}->{regular} . "<br>
|
||||||
|
Ticket Allocation: " . $listing->{ticket_allocation} . "<br>
|
||||||
|
Tickets remaining?: " . yes_or_no($listing->{ticket_remaining}) . "<br>
|
||||||
|
<br>
|
||||||
|
Sales start time: " . Date::strftime("%Y-%m-%d %a %H:%M:%S", str2time($listing->{sales_start})) . "<br>
|
||||||
|
On Sale?: " . yes_or_no($listing->{on_sale}) . "<br>
|
||||||
|
]]>
|
||||||
|
</description>
|
||||||
|
</item>
|
||||||
|
");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print("</channel></rss>");
|
@ -16,7 +16,7 @@ for listing in listings:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
print('<item>')
|
print('<item>')
|
||||||
print('<title>' + listing['pagetitle'] + '</title>')
|
print('<title><![CDATA[' + listing['pagetitle'] + ']]></title>')
|
||||||
print('<link>https://roisindubh.net/listings/' + listing['alias'] + '</link>')
|
print('<link>https://roisindubh.net/listings/' + listing['alias'] + '</link>')
|
||||||
|
|
||||||
print('<description> <![CDATA[')
|
print('<description> <![CDATA[')
|
41
src/teanglann_wotd.pl
Executable file
41
src/teanglann_wotd.pl
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
# Script to scrape the Irish dictionary website teanglann.ie and turn the "Word of the Day" into an RSS item
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use HTML::TreeBuilder;
|
||||||
|
use Encode;
|
||||||
|
|
||||||
|
# use UTF-8 when writing to STDOUT
|
||||||
|
binmode(STDOUT, ":encoding(utf8)");
|
||||||
|
|
||||||
|
my $url = "https://www.teanglann.ie/en/";
|
||||||
|
my $html = `curl "$url"`;
|
||||||
|
|
||||||
|
my $tree = HTML::TreeBuilder->new;
|
||||||
|
$tree->parse(decode('UTF-8', $html));
|
||||||
|
$tree->eof;
|
||||||
|
|
||||||
|
my $div = $tree->look_down(_tag => "div", class => "wod");
|
||||||
|
my $a_tag = $div->look_down(_tag => "a", class => "headword");
|
||||||
|
my $entry = $div->look_down(_tag => "span", class => "entry");
|
||||||
|
|
||||||
|
my $word = $a_tag->as_text;
|
||||||
|
my $link = $a_tag->attr("href");
|
||||||
|
|
||||||
|
print("
|
||||||
|
<rss xmlns:atom='http://www.w3.org/2005/Atom' version='2.0'>
|
||||||
|
<channel>
|
||||||
|
<title>Teanglann.ie: Focal an Lae</title>
|
||||||
|
<link>" . $url . "</link>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>" . $word . "</title>
|
||||||
|
<link>https://www.teanglann.ie" . $link . "</link>
|
||||||
|
<pubDate>" . `date "+%a, %d %b %Y %H:%M:%S %z"` . "</pubDate>
|
||||||
|
<description><![CDATA[" . $entry->as_text . "]]></description>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
");
|
||||||
|
|
||||||
|
$tree = $tree->delete;
|
Reference in New Issue
Block a user