#!/usr/bin/perl # linda macphee-cobb # http://herselfswebtools.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # # This program backups up blogger posts from your rss feed. # # You must change http://www.bloggers.com/feeds/99999999/posts/default?max-results=500&alt=rss # Replace the 9999s with your blog id number. Log onto blogger. Go to posts and you'll see # BlogID= followed by a number in the URL bar of your browser. Just plug in that number and remove # the 9s on line 38 ####################################################### # includes and globals ####################################################### #includes use strict; use LWP::Simple; # global variables my $content; my $file; # other globals ################################################################################ # Fetch rss stream for posts # ***** !!! You must change the 999s to your blog id number !!! ***** # $_ = get ( "http://www.blogger.com/feeds/999999999999/posts/default?max-results=300&alt=rss"); ############################################################################################# # pull out all there is one for each post and put into an array for processing my @items = m/.*?<\/item>/g; # for each item in the array pull out the pubDate, description, link and how ever many images there many be foreach my $item ( @items ){ # pull posting date from item $_ = $item; my @post_date = m/.*?<\/pubDate>/g; my $date = @post_date[0]; $date =~ s/<\/*pubDate>//g; #remove tags # pull actual post from item my @post_data = m/.*?<\/description>/g; my $post = @post_data[0]; $post =~ s/<\/*description>//g; # remove tags $post =~ s/<//g; # replace > with > # pull the link to this file from item and link is the same as the file name my @link_data = m/.*?<\/link>/g; my $link = @link_data[0]; $link =~ s/<\/*link>//g; # remove tags $link =~ s/http:\/\///; # remove http:// $link =~ s/"//g; # sometimes " are still on end of name # create and save a file containing pubDate, description ( post ) name it with the link name # does directory exist? if not create it my @directories = split /\//, $link; $#directories--; # removes file name from end of path list my $checkDirectory = ""; # put each post in a directory by the month and year it was created foreach my $directory ( @directories ){ # need to check first and build path as we go or cd into directories as we go $checkDirectory .= $directory . "/"; if ( -e $checkDirectory ){ }else{ mkdir $checkDirectory, 0755; } } # pull the title from the file name my @title_data = split /\//, $link; my $title = pop ( @title_data ); $title =~ s/.html//; # create the file on the computer and write to the file open FILEHANDLE, ">$link"; print FILEHANDLE "\n $title "; print FILEHANDLE "\n

$date

"; print FILEHANDLE "\n

$post

"; print FILEHANDLE "\n"; close FILEHANDLE; # download any images from this post to this directory $_ = $post; my @images = m/src=".*?"/g; foreach my $image ( @images ){ # pull url for image out of image information $image =~ s/src=//g; my $image_url = $image; # pull image name out of image information my @image_data = split /\//, $image; my $image_name = pop ( @image_data ); $image_name =~ s/"//g; # put images in proper directory my $image_path = join ( "/", @directories ); $image_path = $image_path . "/$image_name"; # download and save the images $_ = get ( $image_url ); open FILEHANDLE, ">$image_path"; print FILEHANDLE $_; close FILEHANDLE; } }