diff --git a/teanglann_word_of_the_day.pl b/teanglann_word_of_the_day.pl new file mode 100755 index 0000000..3b63b88 --- /dev/null +++ b/teanglann_word_of_the_day.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl +# Script to scrape the Irish dictionary website teanglann.ie and turn the "Word of the Day" into an RSS item +use strict; +use warnings; +use HTML::TreeBuilder; +use Encode; + +# use UTF-8 when writing to STDOUT +binmode(STDOUT, ":encoding(utf8)"); + +my $url = "https://www.teanglann.ie/en/"; +my $html = `curl "$url"`; + +my $tree = HTML::TreeBuilder->new; +$tree->parse(decode('UTF-8', $html)); +$tree->eof; + +my $div = $tree->look_down(_tag => "div", class => "wod"); +my $a_tag = $div->look_down(_tag => "a", class => "headword"); +my $entry = $div->look_down(_tag => "span", class => "entry"); + +my $word = $a_tag->as_text; +my $link = $a_tag->attr("href"); + +print(" + + + Teanglann.ie: Focal an Lae + " . $url . " + + + " . $word . " + https://www.teanglann.ie" . $link . " + " . `date "+%a, %d %b %Y %H:%M:%S %z"` . " + as_text . "]]> + + + +"); + +$tree = $tree->delete;