Sophie

Sophie

distrib > Mageia > 4 > x86_64 > by-pkgid > 4ad3d1a5da4da0b6f17e448e62d8ca92 > files > 18

perl-Web-Scraper-0.370.0-3.mga4.noarch.rpm

#!/usr/bin/perl
# Extract tags from web pages that have rel-tag microformat
use strict;
use warnings;
use URI;
use URI::Escape;
use Web::Scraper;
use YAML;

my $uri = shift or die "Usage: rel-tag.pl URL\n";

my $scraper = scraper {
    process 'a[rel~="tag"]', 'tags[]' => sub {
        my $uri = URI->new($_->attr('href'));
        my $label = (grep length, split '/', $uri->path)[-1];
           $label =~ s/\+/%20/g;
        uri_unescape($label);
    };
};
warn Dump $scraper->scrape(URI->new($uri));