#!/usr/bin/perl # linda macphee-cobb # http://herselfswebtools.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # # This program backups up websites from your rss feed. It was written so # I could back up my blogger blogs, but should work just fine with any website. # This does not back up comments, just the posts rss feed # This program also does not yet back up your images. I'll add that in later # # This program grabs the rss feed from your blogger blog # It then grabs the link to each page and downloads the full html of that page # not just the post # But it does not yet grab the images. This is only the first version. # # You can change $content = get () and put in the rss feed for any website to # which you publish an rss feed and it should work just fine. ######### to do ################################### # download images for webpage # download only post not full html for blogger # does not yet fetch comments ########## end to do ################################ ####################################################### # includes and globals ####################################################### #includes use strict; use XML::RSS; use LWP::Simple; # global variables my $content; my $file; # other globals my $rss = new XML::RSS; ################################################################################ # Fetch rss stream for posts ################################################################################ # url to fetch # !!!!! # You must change 999999999999 to your blog id number ! # To find your blog id number log onto blogger and go into posts # At the top of your browser in the URL bar you will see blogID= # The number after blogID= is your blogger id number # # You can also change max-results=500 to the number of posts you have if you have more # than 500 posts # !!!!!! $content = get ( "http://www.blogger.com/feeds/99999999999999/posts/default?max-results=500&alt=rss"); # sort it all out now $rss->parse($content); # while more links foreach my $item (@{$rss->{'items'}}){ #fetch link next unless defined($item->{'title'}) && defined($item->{'link'}); #parse link my $path = $item->{'link'}; $path =~ s/http:\/\///; my $url = $item->{'link'}; #see if this file exists on our computer? if ( -e $path ){ # file exists do nothing }else{ # file does not exist # does directory exist? if not create it my @directories = split /\//, $path; $#directories--; # removes file name from end of path list my $checkDirectory = ""; foreach my $directory ( @directories ){ # need to check first and build path as we go or cd into directories as we go $checkDirectory .= $directory . "/"; if ( -e $checkDirectory ){ }else{ mkdir $checkDirectory, 0755; } } # download the html file $content = get( $url ); # create the file on the computer and write to the file open FILEHANDLE, ">$path"; print FILEHANDLE $content; close FILEHANDLE; #download images !!! not yet done } } #end while more links