From c98f5a4d324627014c4a09f1f6eee91521881cbf Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 8 Nov 2024 02:40:38 +0000 Subject: [PATCH] [teanglann]: Add teanglann_word_of_the_day.pl --- teanglann_word_of_the_day.pl | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100755 teanglann_word_of_the_day.pl diff --git a/teanglann_word_of_the_day.pl b/teanglann_word_of_the_day.pl new file mode 100755 index 0000000..3b63b88 --- /dev/null +++ b/teanglann_word_of_the_day.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl +# Script to scrape the Irish dictionary website teanglann.ie and turn the "Word of the Day" into an RSS item +use strict; +use warnings; +use HTML::TreeBuilder; +use Encode; + +# use UTF-8 when writing to STDOUT +binmode(STDOUT, ":encoding(utf8)"); + +my $url = "https://www.teanglann.ie/en/"; +my $html = `curl "$url"`; + +my $tree = HTML::TreeBuilder->new; +$tree->parse(decode('UTF-8', $html)); +$tree->eof; + +my $div = $tree->look_down(_tag => "div", class => "wod"); +my $a_tag = $div->look_down(_tag => "a", class => "headword"); +my $entry = $div->look_down(_tag => "span", class => "entry"); + +my $word = $a_tag->as_text; +my $link = $a_tag->attr("href"); + +print(" + + + Teanglann.ie: Focal an Lae + " . $url . " + + + " . $word . " + https://www.teanglann.ie" . $link . " + " . `date "+%a, %d %b %Y %H:%M:%S %z"` . " + as_text . "]]> + + + +"); + +$tree = $tree->delete;