#!/usr/bin/perl # linda macphee-cobb # http://herselfswebtools.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # # This program grabs your rss feed, sorts the posts by category ( label ) and creates # an HTML file of links to all your posts sorted by category # READ ME AND FOLLOW THE DIRECTIONS ! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Change the line ( 46 ) # $rss = "http://www.blogger.com/feeds/9999999999999999/posts/default?max-results=300&alt=rss"; # to your rss feed for your blog. You just have to change the blogger id number # which you will see in the location bar of your browser when you are editing posts. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ####################################################### # includes and globals ####################################################### #includes use strict; use LWP::Simple; # global variables my $content; my $file; # open html file to save news to open FILEHANDLE, ">Blogger Archive.html"; print FILEHANDLE "\n\n\nYour Blog Archive\n\n"; ################################################################################ # Fetch rss stream for posts my $rss = "http://www.blogger.com/feeds/999999999999999999999/posts/default?max-results=300&alt=rss"; $_ = get ( $rss ); my @link_list; ############################################################################################# # pull out all there is one for each post and put into an array for processing my @items = m/.*?<\/item>/sg; # for each item in the array pull out the pubDate, description, link and how ever many images there many be foreach my $item ( @items ){ # pull posting date from item $_ = $item; my @post_date = m/.*?<\/pubDate>/g; my $date = @post_date[0]; $date =~ s/<\/*pubDate>//g; #remove tags # pull actual post from item my @post_data = m/.*?<\/description>/g; my $post = @post_data[0]; $post =~ s/<\/*description>//g; # remove tags $post =~ s/<//g; # replace > with > $post =~ s///g; # remove ]] at end of post # pull the link to this file from item and link is the same as the file name my @link_data = m/.*?<\/link>/g; my $link = @link_data[0]; $link =~ s/<\/*link>//g; # remove tags $link =~ s/http:\/\///; # remove http:// $link =~ s/us.rd.yahoo.com.*?\/\///g; # clean up link - yahoo rss specific $link =~ s/"//g; # sometimes " are still on end of name # pull title my @title_data = m/.*?<\/title>/g; my $title = @title_data[0]; $title =~ s/<\/*title>//g; # remove tags $title =~ s/<!\[CDATA\[//g; # remove <![CDATA[ $title =~ s/\]\]>//g; # remove ]] at end of post # pull catagories my @category_data = m/<category.*?<\/category>/g; my $count = 0; my @category; foreach my $label ( @category_data ){ $category[$count] = @category_data[$count]; $category[$count] =~ s/<\/*category>/ &/g; $category[$count] =~ s/<category.*?>/ &/; push ( @link_list, "$category[$count] <a href=\"http://$link\">$title</a> \n"); $count++; } } # sort clean and print my @sorted_list = sort @link_list; my $label = ""; my $count = 0; foreach my $cat ( @sorted_list ){ my $test_label = $sorted_list[$count]; $test_label =~ s/<a href=.*?<\/a>//; # if a new category if ( $test_label ne $label ){ $label = $test_label; print FILEHANDLE "\n<br><hr>"; $test_label =~ s/&//g; print FILEHANDLE "$test_label<hr>\n"; } # print link $sorted_list[$count] =~ s/&.*?&//; print FILEHANDLE $sorted_list[$count]; $count++; } # close the file print FILEHANDLE "\n</body>\n<html>"; close FILEHANDLE;