#!/usr/bin/perl
# linda macphee-cobb
# http://herselfswebtools.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
#
# This program grabs your rss feed, sorts the posts by category ( label ) and creates
# an HTML file of links to all your posts sorted by category
# READ ME AND FOLLOW THE DIRECTIONS !
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# First log into the WordPress admin area
# go to Options -> Reading and set Syndication feeds to some number larger than the
# number of posts you have in your blog
#
# Second change the line ( 48 )
# my $rss = "http://yourdomain.com/wordpress/feed/?max-results=500";
# to your rss feed
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#######################################################
# includes and globals
#######################################################
#includes
use strict;
use LWP::Simple;
# global variables
my $content;
my $file;
# open html file to save news to
open FILEHANDLE, ">Wordpress Archive.html";
print FILEHANDLE "\n\n
\nYour Blog Archive\n\n";
################################################################################
# Fetch rss stream for posts
my $rss = "http://your-website-url.com/feed/";
$_ = get ( $rss );
print $_;
my @link_list;
#############################################################################################
# pull out all there is one for each post and put into an array for processing
my @items = m/.*?<\/item>/sg;
# for each item in the array pull out the pubDate, description, link and how ever many images there many be
foreach my $item ( @items ){
# pull posting date from item
$_ = $item;
my @post_date = m/.*?<\/pubDate>/g;
my $date = @post_date[0];
$date =~ s/<\/*pubDate>//g; #remove tags
# pull actual post from item
my @post_data = m/.*?<\/description>/g;
my $post = @post_data[0];
$post =~ s/<\/*description>//g; # remove tags
$post =~ s/<//g; # replace > with >
$post =~ s///g; # remove ]] at end of post
# pull the link to this file from item and link is the same as the file name
my @link_data = m/.*?<\/link>/g;
my $link = @link_data[0];
$link =~ s/<\/*link>//g; # remove tags
$link =~ s/http:\/\///; # remove http://
$link =~ s/us.rd.yahoo.com.*?\/\///g; # clean up link - yahoo rss specific
$link =~ s/"//g; # sometimes " are still on end of name
# pull title
my @title_data = m/.*?<\/title>/g;
my $title = @title_data[0];
$title =~ s/<\/*title>//g; # remove tags
$title =~ s///g; # remove ]] at end of post
# pull catagories
my @category_data = m/.*?<\/category>/g;
my $count = 0;
my @category;
foreach my $label ( @category_data ){
$category[$count] = @category_data[$count];
$category[$count] =~ s/<\/*category>/ &/g;
push ( @link_list, "$category[$count] $title \n");
$count++;
}
}
# sort clean and print
my @sorted_list = sort @link_list;
my $label = "";
my $count = 0;
foreach my $cat ( @sorted_list ){
my $test_label = $sorted_list[$count];
$test_label =~ s///;
# if a new category
if ( $test_label ne $label ){
$label = $test_label;
$test_label =~ s/&//g;
$test_label =~ s///g;
print FILEHANDLE "\n $test_label";
}
# print link
$sorted_list[$count] =~ s/&.*?&//;
print FILEHANDLE $sorted_list[$count];
print "\n";
$count++;
}
#print FILEHANDLE @sorted_list;
# close the file
print FILEHANDLE "\n\n";
close FILEHANDLE;