#!/usr/bin/perl
# linda macphee-cobb
# http://herselfswebtools.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
#
# This program backups up blogger posts from your rss feed.
#
# You must change http://www.bloggers.com/feeds/99999999/posts/default?max-results=500&alt=rss
# Replace the 9999s with your blog id number. Log onto blogger. Go to posts and you'll see
# BlogID= followed by a number in the URL bar of your browser. Just plug in that number and remove
# the 9s on line 38
#######################################################
# includes and globals
#######################################################
#includes
use strict;
use LWP::Simple;
# global variables
my $content;
my $file;
# other globals
################################################################################
# Fetch rss stream for posts
# ***** !!! You must change the 999s to your blog id number !!! ***** #
$_ = get ( "http://www.blogger.com/feeds/999999999999/posts/default?max-results=300&alt=rss");
#############################################################################################
# pull out all there is one for each post and put into an array for processing
my @items = m/.*?<\/item>/g;
# for each item in the array pull out the pubDate, description, link and how ever many images there many be
foreach my $item ( @items ){
# pull posting date from item
$_ = $item;
my @post_date = m/.*?<\/pubDate>/g;
my $date = @post_date[0];
$date =~ s/<\/*pubDate>//g; #remove tags
# pull actual post from item
my @post_data = m/.*?<\/description>/g;
my $post = @post_data[0];
$post =~ s/<\/*description>//g; # remove tags
$post =~ s/<//g; # replace > with >
# pull the link to this file from item and link is the same as the file name
my @link_data = m/.*?<\/link>/g;
my $link = @link_data[0];
$link =~ s/<\/*link>//g; # remove tags
$link =~ s/http:\/\///; # remove http://
$link =~ s/"//g; # sometimes " are still on end of name
# create and save a file containing pubDate, description ( post ) name it with the link name
# does directory exist? if not create it
my @directories = split /\//, $link;
$#directories--; # removes file name from end of path list
my $checkDirectory = "";
# put each post in a directory by the month and year it was created
foreach my $directory ( @directories ){
# need to check first and build path as we go or cd into directories as we go
$checkDirectory .= $directory . "/";
if ( -e $checkDirectory ){
}else{
mkdir $checkDirectory, 0755;
}
}
# pull the title from the file name
my @title_data = split /\//, $link;
my $title = pop ( @title_data );
$title =~ s/.html//;
# create the file on the computer and write to the file
open FILEHANDLE, ">$link";
print FILEHANDLE "\n $title ";
print FILEHANDLE "\n
$date
";
print FILEHANDLE "\n
$post
";
print FILEHANDLE "\n";
close FILEHANDLE;
# download any images from this post to this directory
$_ = $post;
my @images = m/src=".*?"/g;
foreach my $image ( @images ){
# pull url for image out of image information
$image =~ s/src=//g;
my $image_url = $image;
# pull image name out of image information
my @image_data = split /\//, $image;
my $image_name = pop ( @image_data );
$image_name =~ s/"//g;
# put images in proper directory
my $image_path = join ( "/", @directories );
$image_path = $image_path . "/$image_name";
# download and save the images
$_ = get ( $image_url );
open FILEHANDLE, ">$image_path";
print FILEHANDLE $_;
close FILEHANDLE;
}
}