#!/usr/bin/perl # Written by Adam Bernard 2010 # http://pseudomonas.dreamwidth.org # This is still being tested, and is not guaranteed not to set your computer on fire # or lose the data on your LJ or DW. # Please please please back up your data before using this. # I repeat, please please please back up your data before using this. # Please make bug reports to http://pseudomonas.dreamwidth.org/37645.html # You'll need to edit the user options below. # The action on crossposts should only be enabled if you have ONLY # EVER CROSSPOSTED to one account livejournal.com, and NEVER to any other sites/accounts. # This is because the script can't tell which account is which - it can only grab the first # one listed. # Runs through DW posts, finding LJ entries that have already been cross-posted or imported to DW, # and for some subset of them based on things like their visibility, length, whether they contain # polls, and so on, edit them so that their text is commented out (possibly with a snippet of the # original post), adding a note saying that the post has been moved to such-and-such # a DW post which is thataway. use strict;use utf8; use Data::Dumper; use POSIX; use Fcntl; use XMLRPC::Lite; use Digest::MD5 qw(md5_hex); use Storable qw(thaw); use HTML::Parser; # There are two authentication modes, Digest is faster, lower-bandwidth, but NOT secure # since your password is sent (digested) every request. To enable it, set $USE_DIGEST_AUTH to 1. # If you do this, you might consider CHANGING YOUR PASSWORD after running the script. my $USE_DIGEST_AUTH = 0; # The default mode, challenge-response is more secure, but makes twice as many server requests. # The files lj.pwd and dw.pwd contain the passwords for livejournal # and dreamwidth respectively. Plain text files. open PASS, "dw.pwd"; my $dw_pass = ; chomp $dw_pass; close PASS; open PASS, "lj.pwd"; my $lj_pass = ; chomp $lj_pass; close PASS; my $VERSION = "0.1"; # note - this should stay as alphanumerics ([A-Za-z0-9_] and full stop.) my $DEBUG = 0; my $DW; my $LJ; $DW->{RPC} = new XMLRPC::Lite; $DW->{RPC}->proxy("http://www.dreamwidth.org/interface/xmlrpc"); $DW->{user} = "pseudomoans"; # Dreamwidth username goes here $DW->{digest} = md5_hex($dw_pass); $LJ->{RPC} = new XMLRPC::Lite; $LJ->{RPC}->proxy("http://www.livejournal.com/interface/xmlrpc"); $LJ->{user} = "pseudomoans"; # Livejournal username goes here $LJ->{digest} = md5_hex($lj_pass); my $MAX_ARTICLES_TO_FETCH = 50; # can't be more than fifty. my $criteria = { # If one of these isn't enabled it won't do anything. INCLUDE_XPOSTED => 0, # only if livejournal is ONLY site ever crossposted to. INCLUDE_IMPORTED => 0, # Dates *must* be strings of the form '2010-09-26 12:50:00' # or undef to leave them unspecified. BEFORE_DATE => undef, # entries dated ON OR AFTER this date will be skipped AFTER_DATE => undef, # entries dated ON OR BEFORE this date will be skipped LONGER_THAN_CHARS =>undef, INCLUDE_WITH_POLLS => 1, MUST_BE_PUBLIC=>undef, MUST_BE_NON_PUBLIC=>undef, MUST_HAVE_NO_COMMENTS=>undef, USE_SNIPPET => 1, # include the first 20 words of each entry as a sumamry. # Don't enable both of these at once - only the "Lock" will take effect. DISABLE_COMMENTS => 0, LOCK_COMMENTS => 0, }; my $LABEL_TEXT = q{
This post has been moved to Dreamwidth here. You may wish to update your links to reflect this.} . q{
}; # some LJ posts may have autoformatting disabled, so any newlines should be specified with
# %%url%% will be replaced with the URL of the DW post. # End of user options. ############################################################ ############################################################ ############################################################ ############################################################ my $before = $criteria->{BEFORE_DATE}; my @list = (); while (1){ # get a list of the 50 (or whatever) most recent items on DW # or those before the cutoff # or those before the last list retrieved my $list_items = xmlrpc_call('LJ.XMLRPC.getevents', { 'selecttype' => "lastn", 'howmany' => $MAX_ARTICLES_TO_FETCH, 'ver' =>1, $before ? (beforedate => $before) : (), }, $DW); my @events = @{$list_items->{events}|| [] }; push @list, @events; for (@events ){print $_->{subject}, "\n" if $DEBUG} $before = $events[-1]->{eventtime}; last if $criteria->{AFTER_DATE} && $before le $criteria->{AFTER_DATE}; # we've got too many! don't worry, they get filtered in the next section. last if scalar @events < $MAX_ARTICLES_TO_FETCH; # if we didn't retrieve all 50, we must have finished sleep 1; } for my $dw_post (@list){ # Let's get checking the logic! # firstly, there needs to be a link to an LJ post, otherwise nothing to do # The post can be of two sorts - crossposted or imported. my $quicksummary = $dw_post->{subject} || get_snippet($dw_post->{event}, 1); print STDERR "$dw_post->{eventtime}: $quicksummary\n"; my $xpost_frozen = $dw_post->{props}->{xpostdetail}; my $xpost; $xpost = thaw($xpost_frozen) if defined $xpost_frozen; # $xpost is a hashref of the places that an entry has been crossposted. my $import_url = $dw_post->{props}->{import_source}; my $remote_itemid = url_to_itemid($import_url), "\n"; # The sort of link that this item has must be allowed in the options. next unless ( $criteria->{INCLUDE_IMPORTED} and $import_url =~ /livejournal.com/ and url_to_itemid($import_url) ) or ($criteria->{INCLUDE_XPOSTED} and exists $xpost->{1} and $xpost->{1}->{itemid} =~ /^\d+$/ ); # and the first site to crosspost is livejournal. # Check the date; my $post_date = $dw_post->{eventtime}; if ($criteria->{AFTER_DATE} && $post_date le $criteria->{AFTER_DATE}){ # too early! print STDERR "$post_date earlier than $criteria->{AFTER_DATE} - skipping!\n"; next; } if ($criteria->{BEFORE_DATE} && $post_date ge $criteria->{BEFORE_DATE}){ # too late print STDERR "$post_date later than $criteria->{BEFORE_DATE} - skipping!\n"; next; } # fetch the LJ post. We want to have done as much excluding of stuff as possible before # fetching it, to keep things fast and not chew up bandwidth. my $lj_id = url_to_itemid($import_url) || $xpost->{1}->{itemid}; my $lj_post = xmlrpc_call('LJ.XMLRPC.getevents', {'selecttype' => "one", itemid => $lj_id}, $LJ)->{events}[0]; unless ($lj_post){print STDERR "No post found for itemID $lj_id\n"; next; } print Dumper($lj_post) if $DEBUG; my $security = $lj_post->{security}; my $post_text = $lj_post->{event}; my $has_had_comments = $lj_post->{props}->{commentalter} ? 1 : 0; my $contains_poll = ($post_text =~ // ? 1 : 0); my $already_been_done = ($post_text =~ // ? 1 : 0); if ($already_been_done){print STDERR "Not going to re-zap entry\n"; next;} if ($criteria->{MUST_HAVE_NO_COMMENTS} and $has_had_comments){print STDERR "Entry has comments already\n"; next;} if ($contains_poll and not $criteria->{INCLUDE_WITH_POLLS}){print STDERR "Entry has a poll in already\n"; next;} # Security. $security is defined for non-public entries, blank for public ones. if ($security and $criteria->{MUST_BE_PUBLIC}){print STDERR "Not a public entry - skipping\n"; next;} if ((not $security) and $criteria->{MUST_BE_NON_PUBLIC}){print STDERR "A public entry - skipping\n"; next;} # length if (defined $criteria->{LONGER_THAN_CHARS} and length($post_text) <= $criteria->{LONGER_THAN_CHARS} ) { warn "Post length (", length($post_text),") is not longer than $criteria->{LONGER_THAN_CHARS}\n"; next; } my $dw_url = $dw_post->{url}; my $dw_itemid = $dw_post->{itemid}; my $dw_anum = $dw_post->{anum}; my $dw_ditemid = ($dw_itemid * 256) + $dw_anum; my $dw_comment_count_img = "http://www.dreamwidth.org/tools/commentcount?user=".$DW->{user}."&=$dw_ditemid"; my $datestamp = time(); my $label_text = $LABEL_TEXT; $label_text =~ s/%%url%%/$dw_url/g; # I'd suggest that if you fork the script you don't change $id_string to something that # other versions of the script won't pick up, in case a user runs two different # forks of the script on the same journal. Changing the $VERSION to something # distinctive (and alphanumeric) might work better. my $id_string = qq{}; # A snippet is the first 20 words, plus an ellipsis if this chops out a substantial amount of text. my $snippet = ""; if ($criteria->{USE_SNIPPET}) { $snippet = get_snippet(get_plain_text($post_text)) } my $subject = $lj_post->{subject}; my $replacement_entry = ($snippet. get_commented($post_text) . "
" .$label_text . $id_string); #die Dumper($lj_post); print STDERR "Editing LJ post... "; my $props = $lj_post->{props}; if ($criteria->{LOCK_COMMENTS}) { $props->{opt_lockcomments} = 1 } elsif ($criteria->{DISABLE_COMMENTS}) { $props->{opt_nocomments} = 1 } my $lj_edited = xmlrpc_call('LJ.XMLRPC.editevent', { itemid => $lj_id, event=> $replacement_entry, subject => $lj_post->{subject}, props => $props, security => $security, allowmask => $lj_post->{allowmask}, }, $LJ)->{events}[0]; print STDERR " Done!\n"; sleep 1; } END{print "\n";} sub get_snippet { my $orig = shift; my $plaintext = shift; $orig =~ s/[\n\s]+/ /g; my $s = $orig . " "; $s =~ s/^((?:.+?\s+){0,20}).*$/$1/; $s =~ s/\s$//; if ((length($orig) - length($s)) > 4 ){ $s .= $plaintext ? '[...]' :'[...]' } $s .= $plaintext ? "" : "
"; return $s; } sub get_plain_text { my $s = shift; my $t = ""; HTML::Parser->new(text_h => [sub {$t .= shift}, "text"])->parse($s)->eof; return $t; } sub get_commented { # This replaces text with a set of comments. Each comment block starts with a 1 or a 0. # 1 indicates the block was added as part of this commenting process # 0 indicates the comment block existed beforehand. # I hope that this will facilitate the use of a tool to undo the process. my $s = shift; my $t = ""; HTML::Parser->new( comment_h => [sub {$_[1] =~ s/$_[1]" }, "skipped_text, text"], end_document_h => [sub {$t .= ""}, "skipped_text"], )->parse($s)->eof; return $t. "\n"; } sub url_to_itemid { # A URL's numbery bit is itemid * 256 + random-security-number # We can get back to the itemid by taking this number, dividing by 256 # and discarding the remainder. # http://community.livejournal.com/support_interim/85008.html my $url = shift; return undef unless defined $url; $url =~ m{\/(\d+)\.html$} or return undef; my $jitemid = $1; return int($jitemid / 256); } sub xmlrpc_call { my ($method, $req, $acct) = @_; $req->{username} = $acct->{user}; if ($USE_DIGEST_AUTH){ $req->{auth_method} = 'clear'; $req->{hpassword} = $acct->{digest}; } else { # default - challenge/response auth my $get_chal = $acct->{RPC}->call("LJ.XMLRPC.getchallenge", {}); my $chal = $get_chal->result->{'challenge'}; print STDERR "chal: $chal\n" if $DEBUG; my $response = md5_hex($chal . $acct->{digest}); # add to the request hashref $req->{auth_method} = 'challenge'; $req->{auth_challenge} = $chal; $req->{auth_response} = $response; } my $res = $acct->{RPC}->call($method, $req); if ($res->fault) { print STDERR "Error:\n". " String: " . $res->faultstring . "\n" . " Code: " . $res->faultcode . "\n"; exit 1; } return $res->result; }