#!/usr/bin/perl # ------ extract-isa16.pl # Make a list of sessions of 2016 Third ISA Forum of Sociology # 2013/08/15 - 2013/09/19 (extract-isa14.pl) # 2015/05/06 # Download the lists of sessions from ISA website: # wget -N -x http://www.isa-sociology.org/forum-2016/rc/joint-sessions.php # wget -N -x -r -l1 -np http://www.isa-sociology.org/forum-2016/rc/ # wget -N -x -r -l1 -np http://www.isa-sociology.org/forum-2016/wg/ # wget -N -x -r -l1 -np http://www.isa-sociology.org/forum-2016/tg/ undef $/ ; $\ ="\n"; $" = $, ="\t"; $Count =0; sub url { my $f = shift; $f =~ s|\@|\?| ; $f =~ s|\\|\/|g ; "http://$f" ; } sub output { my ( @p ) = @_ ; my $text; foreach $text (@p){ $text =~ s/^[^>]+>// ; $text =~ s|| // |g ; $text =~ s/<[^>]+>/ /g ; $text =~ s/\n/ /g ; $text =~ s/&#(\d+);/ chr($1) /eg ; $text =~ s/…/ ... /g ; $text =~ s/ / /g ; $text =~ s/\s+/ /g ; $text =~ s/(\[[A-Z]+\])\s*$//i; # $text =~ s/; s/\n/ /g ; s|(.+)|i ; # print "\n\n"; print ""; output ($h1); my @entry = split ( /(.+)|i ; ($title) = $e=~m|

(.+)

|i unless $title; next if $title =~ m |